diff --git a/cranelift/codegen/src/isa/arm64/inst/args.rs b/cranelift/codegen/src/isa/arm64/inst/args.rs new file mode 100644 index 0000000000..75cf12283b --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/inst/args.rs @@ -0,0 +1,501 @@ +//! ARM64 ISA definitions: instruction arguments. + +#![allow(dead_code)] +#![allow(non_snake_case)] + +use crate::binemit::{CodeOffset, CodeSink}; +use crate::ir::constant::{ConstantData, ConstantOffset}; +use crate::ir::Type; +use crate::isa::arm64::inst::*; +use crate::machinst::*; + +use regalloc::{ + RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable, + NUM_REG_CLASSES, +}; + +use std::string::{String, ToString}; + +/// A shift operator for a register or immediate. +#[derive(Clone, Copy, Debug)] +pub enum ShiftOp { + ASR, + LSR, + LSL, + ROR, +} + +impl ShiftOp { + /// Get the encoding of this shift op. + pub fn bits(&self) -> u8 { + match self { + &ShiftOp::LSL => 0b00, + &ShiftOp::LSR => 0b01, + &ShiftOp::ASR => 0b10, + &ShiftOp::ROR => 0b11, + } + } +} + +/// A shift operator with an amount, guaranteed to be within range. +#[derive(Clone, Debug)] +pub struct ShiftOpAndAmt { + op: ShiftOp, + shift: ShiftOpShiftImm, +} + +/// A shift operator amount. +#[derive(Clone, Copy, Debug)] +pub struct ShiftOpShiftImm(u8); + +impl ShiftOpShiftImm { + /// Maximum shift for shifted-register operands. + pub const MAX_SHIFT: u64 = 63; + + /// Create a new shiftop shift amount, if possible. + pub fn maybe_from_shift(shift: u64) -> Option { + if shift <= Self::MAX_SHIFT { + Some(ShiftOpShiftImm(shift as u8)) + } else { + None + } + } + + /// Return the shift amount. + pub fn value(&self) -> u8 { + self.0 + } +} + +impl ShiftOpAndAmt { + pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt { + ShiftOpAndAmt { op, shift } + } + + /// Get the shift op. + pub fn op(&self) -> ShiftOp { + self.op.clone() + } + + /// Get the shift amount. + pub fn amt(&self) -> ShiftOpShiftImm { + self.shift + } +} + +/// An extend operator for a register. +#[derive(Clone, Copy, Debug)] +pub enum ExtendOp { + SXTB, + SXTH, + SXTW, + SXTX, + UXTB, + UXTH, + UXTW, + UXTX, +} + +impl ExtendOp { + /// Encoding of this op. + pub fn bits(&self) -> u8 { + match self { + &ExtendOp::UXTB => 0b000, + &ExtendOp::UXTH => 0b001, + &ExtendOp::UXTW => 0b010, + &ExtendOp::UXTX => 0b011, + &ExtendOp::SXTB => 0b100, + &ExtendOp::SXTH => 0b101, + &ExtendOp::SXTW => 0b110, + &ExtendOp::SXTX => 0b111, + } + } +} + +//============================================================================= +// Instruction sub-components (memory addresses): definitions + +/// A reference to some memory address. +#[derive(Clone, Debug)] +pub enum MemLabel { + /// An address in the code, a constant pool or jumptable, with relative + /// offset from this instruction. This form must be used at emission time; + /// see `memlabel_finalize()` for how other forms are lowered to this one. + PCRel(i32), +} + +/// A memory argument to load/store, encapsulating the possible addressing modes. +#[derive(Clone, Debug)] +pub enum MemArg { + Label(MemLabel), + PostIndexed(Writable, SImm9), + PreIndexed(Writable, SImm9), + // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to + // what the ISA calls the "register offset" addressing mode. We split out + // several options here for more ergonomic codegen. + RegReg(Reg, Reg), + RegScaled(Reg, Reg, Type), + RegScaledExtended(Reg, Reg, Type, ExtendOp), + Unscaled(Reg, SImm9), + UnsignedOffset(Reg, UImm12Scaled), + /// Offset from the stack pointer or frame pointer. + SPOffset(i64), + FPOffset(i64), +} + +impl MemArg { + /// Memory reference using an address in a register. + pub fn reg(reg: Reg) -> MemArg { + // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur. + // This also does not use PostIndexed / PreIndexed as they update the register. + MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64)) + } + + /// Memory reference using an address in a register and an offset, if possible. + pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option { + if offset == 0 { + Some(MemArg::Unscaled(reg, SImm9::zero())) + } else if let Some(simm9) = SImm9::maybe_from_i64(offset) { + Some(MemArg::Unscaled(reg, simm9)) + } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) { + Some(MemArg::UnsignedOffset(reg, uimm12s)) + } else { + None + } + } + + /// Memory reference using the sum of two registers as an address. + pub fn reg_reg(reg1: Reg, reg2: Reg) -> MemArg { + MemArg::RegReg(reg1, reg2) + } + + /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address. + pub fn reg_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> MemArg { + MemArg::RegScaled(reg1, reg2, ty) + } + + /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address. + pub fn reg_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> MemArg { + MemArg::RegScaledExtended(reg1, reg2, ty, op) + } + + /// Memory reference to a label: a global function or value, or data in the constant pool. + pub fn label(label: MemLabel) -> MemArg { + MemArg::Label(label) + } +} + +/// A memory argument to a load/store-pair. +#[derive(Clone, Debug)] +pub enum PairMemArg { + SignedOffset(Reg, SImm7Scaled), + PreIndexed(Writable, SImm7Scaled), + PostIndexed(Writable, SImm7Scaled), +} + +//============================================================================= +// Instruction sub-components (conditions, branches and branch targets): +// definitions + +/// Condition for conditional branches. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Cond { + Eq, + Ne, + Hs, + Lo, + Mi, + Pl, + Vs, + Vc, + Hi, + Ls, + Ge, + Lt, + Gt, + Le, + Al, + Nv, +} + +impl Cond { + /// Return the inverted condition. + pub fn invert(self) -> Cond { + match self { + Cond::Eq => Cond::Ne, + Cond::Ne => Cond::Eq, + Cond::Hs => Cond::Lo, + Cond::Lo => Cond::Hs, + Cond::Mi => Cond::Pl, + Cond::Pl => Cond::Mi, + Cond::Vs => Cond::Vc, + Cond::Vc => Cond::Vs, + Cond::Hi => Cond::Ls, + Cond::Ls => Cond::Hi, + Cond::Ge => Cond::Lt, + Cond::Lt => Cond::Ge, + Cond::Gt => Cond::Le, + Cond::Le => Cond::Gt, + Cond::Al => Cond::Nv, + Cond::Nv => Cond::Al, + } + } + + /// Return the machine encoding of this condition. + pub fn bits(self) -> u32 { + match self { + Cond::Eq => 0, + Cond::Ne => 1, + Cond::Hs => 2, + Cond::Lo => 3, + Cond::Mi => 4, + Cond::Pl => 5, + Cond::Vs => 6, + Cond::Vc => 7, + Cond::Hi => 8, + Cond::Ls => 9, + Cond::Ge => 10, + Cond::Lt => 11, + Cond::Gt => 12, + Cond::Le => 13, + Cond::Al => 14, + Cond::Nv => 15, + } + } +} + +/// The kind of conditional branch: the common-case-optimized "reg-is-zero" / +/// "reg-is-nonzero" variants, or the generic one that tests the machine +/// condition codes. +#[derive(Clone, Copy, Debug)] +pub enum CondBrKind { + /// Condition: given register is zero. + Zero(Reg), + /// Condition: given register is nonzero. + NotZero(Reg), + /// Condition: the given condition-code test is true. + Cond(Cond), +} + +impl CondBrKind { + /// Return the inverted branch condition. + pub fn invert(self) -> CondBrKind { + match self { + CondBrKind::Zero(reg) => CondBrKind::NotZero(reg), + CondBrKind::NotZero(reg) => CondBrKind::Zero(reg), + CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()), + } + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug)] +pub enum BranchTarget { + /// An unresolved reference to a BlockIndex, as passed into + /// `lower_branch_group()`. + Block(BlockIndex), + /// A resolved reference to another instruction, after + /// `Inst::with_block_offsets()`. + ResolvedOffset(isize), +} + +impl BranchTarget { + /// Lower the branch target given offsets of each block. + pub fn lower(&mut self, targets: &[CodeOffset], my_offset: CodeOffset) { + match self { + &mut BranchTarget::Block(bix) => { + let bix = bix as usize; + assert!(bix < targets.len()); + let block_offset_in_func = targets[bix]; + let branch_offset = (block_offset_in_func as isize) - (my_offset as isize); + *self = BranchTarget::ResolvedOffset(branch_offset); + } + &mut BranchTarget::ResolvedOffset(..) => {} + } + } + + /// Get the block index. + pub fn as_block_index(&self) -> Option { + match self { + &BranchTarget::Block(bix) => Some(bix), + _ => None, + } + } + + /// Get the offset as 4-byte words. Returns `0` if not + /// yet resolved (in that case, we're only computing + /// size and the offset doesn't matter). + pub fn as_offset_words(&self) -> isize { + match self { + &BranchTarget::ResolvedOffset(off) => off >> 2, + _ => 0, + } + } + + /// Get the offset as a 26-bit offset suitable for a 26-bit jump, or `None` if overflow. + pub fn as_off26(&self) -> Option { + let off = self.as_offset_words(); + if (off < (1 << 25)) && (off >= -(1 << 25)) { + Some((off as u32) & ((1 << 26) - 1)) + } else { + None + } + } + + /// Get the offset as a 16-bit offset, or `None` if overflow. + pub fn as_off19(&self) -> Option { + let off = self.as_offset_words(); + if (off < (1 << 18)) && (off >= -(1 << 18)) { + Some((off as u32) & ((1 << 19) - 1)) + } else { + None + } + } + + /// Map the block index given a transform map. + pub fn map(&mut self, block_index_map: &[BlockIndex]) { + match self { + &mut BranchTarget::Block(ref mut bix) => { + let n = block_index_map[*bix as usize]; + *bix = n; + } + &mut BranchTarget::ResolvedOffset(_) => {} + } + } +} + +impl ShowWithRRU for ShiftOpAndAmt { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{:?} {}", self.op(), self.amt().value()) + } +} + +impl ShowWithRRU for ExtendOp { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{:?}", self) + } +} + +impl ShowWithRRU for MemLabel { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &MemLabel::PCRel(off) => format!("pc+{}", off), + } + } +} + +fn shift_for_type(ty: Type) -> usize { + match ty.bytes() { + 1 => 0, + 2 => 1, + 4 => 2, + 8 => 3, + 16 => 4, + _ => panic!("unknown type"), + } +} + +impl ShowWithRRU for MemArg { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &MemArg::Unscaled(reg, simm9) => { + if simm9.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &MemArg::UnsignedOffset(reg, uimm12) => { + if uimm12.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &MemArg::RegReg(r1, r2) => { + format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),) + } + &MemArg::RegScaled(r1, r2, ty) => { + let shift = shift_for_type(ty); + format!( + "[{}, {}, LSL #{}]", + r1.show_rru(mb_rru), + r2.show_rru(mb_rru), + shift, + ) + } + &MemArg::RegScaledExtended(r1, r2, ty, op) => { + let shift = shift_for_type(ty); + let is32 = match op { + ExtendOp::SXTW | ExtendOp::UXTW => true, + _ => false, + }; + let op = op.show_rru(mb_rru); + format!( + "[{}, {}, {} #{}]", + r1.show_rru(mb_rru), + show_ireg_sized(r2, mb_rru, is32), + op, + shift + ) + } + &MemArg::Label(ref label) => label.show_rru(mb_rru), + &MemArg::PreIndexed(r, simm9) => format!( + "[{}, {}]!", + r.to_reg().show_rru(mb_rru), + simm9.show_rru(mb_rru) + ), + &MemArg::PostIndexed(r, simm9) => format!( + "[{}], {}", + r.to_reg().show_rru(mb_rru), + simm9.show_rru(mb_rru) + ), + // Eliminated by `mem_finalize()`. + &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { + panic!("Unexpected stack-offset mem-arg mode!") + } + } + } +} + +impl ShowWithRRU for PairMemArg { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &PairMemArg::SignedOffset(reg, simm7) => { + if simm7.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &PairMemArg::PreIndexed(reg, simm7) => format!( + "[{}, {}]!", + reg.to_reg().show_rru(mb_rru), + simm7.show_rru(mb_rru) + ), + &PairMemArg::PostIndexed(reg, simm7) => format!( + "[{}], {}", + reg.to_reg().show_rru(mb_rru), + simm7.show_rru(mb_rru) + ), + } + } +} + +impl ShowWithRRU for Cond { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let mut s = format!("{:?}", self); + s.make_ascii_lowercase(); + s + } +} + +impl ShowWithRRU for BranchTarget { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &BranchTarget::Block(block) => format!("block{}", block), + &BranchTarget::ResolvedOffset(off) => format!("{}", off), + } + } +} diff --git a/cranelift/codegen/src/isa/arm64/inst/emit.rs b/cranelift/codegen/src/isa/arm64/inst/emit.rs new file mode 100644 index 0000000000..20eefdeaae --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/inst/emit.rs @@ -0,0 +1,4106 @@ +//! ARM64 ISA: binary code emission. + +#![allow(dead_code)] +#![allow(non_snake_case)] + +use crate::binemit::{CodeOffset, CodeSink, Reloc}; +use crate::ir::constant::ConstantData; +use crate::ir::types::*; +use crate::ir::{Opcode, TrapCode, Type}; +use crate::isa::arm64::inst::*; +use crate::machinst::*; +use cranelift_entity::EntityRef; + +use std::env; + +use regalloc::{ + RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable, + NUM_REG_CLASSES, +}; + +use alloc::vec::Vec; + +/// Memory label/reference finalization: convert a MemLabel to a PC-relative +/// offset, possibly emitting relocation(s) as necessary. +pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 { + match label { + &MemLabel::PCRel(rel) => rel, + } +} + +/// Memory addressing mode finalization: convert "special" modes (e.g., +/// generic arbitrary stack offset) into real addressing modes, possibly by +/// emitting some helper instructions that come immediately before the use +/// of this amode. +pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec, MemArg) { + match mem { + &MemArg::SPOffset(off) | &MemArg::FPOffset(off) => { + let basereg = match mem { + &MemArg::SPOffset(..) => stack_reg(), + &MemArg::FPOffset(..) => fp_reg(), + _ => unreachable!(), + }; + if let Some(simm9) = SImm9::maybe_from_i64(off) { + let mem = MemArg::Unscaled(basereg, simm9); + (vec![], mem) + } else { + let tmp = writable_spilltmp_reg(); + let mut const_insts = Inst::load_constant(tmp, off as u64); + let add_inst = Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: tmp, + rn: tmp.to_reg(), + rm: basereg, + }; + const_insts.push(add_inst); + (const_insts.to_vec(), MemArg::reg(tmp.to_reg())) + } + } + &MemArg::Label(ref label) => { + let off = memlabel_finalize(insn_off, label); + (vec![], MemArg::Label(MemLabel::PCRel(off))) + } + _ => (vec![], mem.clone()), + } +} + +/// Helper: get a ConstantData from a u64. +pub fn u64_constant(bits: u64) -> ConstantData { + let data = [ + (bits & 0xff) as u8, + ((bits >> 8) & 0xff) as u8, + ((bits >> 16) & 0xff) as u8, + ((bits >> 24) & 0xff) as u8, + ((bits >> 32) & 0xff) as u8, + ((bits >> 40) & 0xff) as u8, + ((bits >> 48) & 0xff) as u8, + ((bits >> 56) & 0xff) as u8, + ]; + ConstantData::from(&data[..]) +} + +//============================================================================= +// Instructions and subcomponents: emission + +fn machreg_to_gpr(m: Reg) -> u32 { + assert!(m.get_class() == RegClass::I64); + assert!(m.is_real()); + m.to_real_reg().get_hw_encoding() as u32 +} + +fn machreg_to_vec(m: Reg) -> u32 { + assert!(m.get_class() == RegClass::V128); + assert!(m.is_real()); + m.to_real_reg().get_hw_encoding() as u32 +} + +fn machreg_to_gpr_or_vec(m: Reg) -> u32 { + m.to_real_reg().get_hw_encoding() as u32 +} + +fn enc_arith_rrr(bits_31_21: u16, bits_15_10: u8, rd: Writable, rn: Reg, rm: Reg) -> u32 { + ((bits_31_21 as u32) << 21) + | ((bits_15_10 as u32) << 10) + | machreg_to_gpr(rd.to_reg()) + | (machreg_to_gpr(rn) << 5) + | (machreg_to_gpr(rm) << 16) +} + +fn enc_arith_rr_imm12(bits_31_24: u8, immshift: u8, imm12: u16, rn: Reg, rd: Writable) -> u32 { + ((bits_31_24 as u32) << 24) + | ((immshift as u32) << 22) + | ((imm12 as u32) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rr_imml(bits_31_23: u16, imm_bits: u16, rn: Reg, rd: Writable) -> u32 { + ((bits_31_23 as u32) << 23) + | ((imm_bits as u32) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable) -> u32 { + (top11 << 21) + | (machreg_to_gpr(rm) << 16) + | (bit15 << 15) + | (machreg_to_gpr(ra) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 { + assert!(off_26_0 < (1 << 26)); + (op_31_26 << 26) | off_26_0 +} + +fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 { + assert!(off_18_0 < (1 << 19)); + (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg) +} + +fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 { + assert!(off_18_0 < (1 << 19)); + assert!(cond < (1 << 4)); + (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond +} + +const MOVE_WIDE_FIXED: u32 = 0x92800000; + +#[repr(u32)] +enum MoveWideOpcode { + MOVN = 0b00, + MOVZ = 0b10, + MOVK = 0b11, +} + +fn enc_move_wide(op: MoveWideOpcode, rd: Writable, imm: MoveWideConst) -> u32 { + assert!(imm.shift <= 0b11); + MOVE_WIDE_FIXED + | (op as u32) << 29 + | (imm.shift as u32) << 21 + | (imm.bits as u32) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { + (op_31_22 << 22) + | (simm7.bits() << 15) + | (machreg_to_gpr(rt2) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (simm9.bits() << 12) + | (op_11_10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (0b1 << 24) + | (uimm12.bits() << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_reg( + op_31_22: u32, + rn: Reg, + rm: Reg, + s_bit: bool, + extendop: Option, + rd: Reg, +) -> u32 { + let s_bit = if s_bit { 1 } else { 0 }; + let extend_bits = match extendop { + Some(ExtendOp::UXTW) => 0b010, + Some(ExtendOp::SXTW) => 0b110, + Some(ExtendOp::SXTX) => 0b111, + None => 0b011, /* LSL */ + _ => panic!("bad extend mode for ld/st MemArg"), + }; + (op_31_22 << 22) + | (1 << 21) + | (machreg_to_gpr(rm) << 16) + | (extend_bits << 13) + | (s_bit << 12) + | (0b10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { + (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd) +} + +fn enc_extend(top22: u32, rd: Writable, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable) -> u32 { + (top11 << 21) + | (machreg_to_vec(rm) << 16) + | (bit15_10 << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable) -> u32 { + (0b01011010110 << 21) + | size << 31 + | opcode2 << 16 + | opcode1 << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_br(rn: Reg) -> u32 { + 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5) +} + +fn enc_adr(off: i32, rd: Writable) -> u32 { + let off = off as u32; + let immlo = off & 3; + let immhi = (off >> 2) & ((1 << 19) - 1); + (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_csel(rd: Writable, rn: Reg, rm: Reg, cond: Cond) -> u32 { + 0b100_11010100_00000_0000_00_00000_00000 + | (machreg_to_gpr(rm) << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) + | (cond.bits() << 12) +} + +fn enc_fcsel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, is32: bool) -> u32 { + let ty_bit = if is32 { 0 } else { 1 }; + 0b000_11110_00_1_00000_0000_11_00000_00000 + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) + | (cond.bits() << 12) + | (ty_bit << 22) +} + +fn enc_cset(rd: Writable, cond: Cond) -> u32 { + 0b100_11010100_11111_0000_01_11111_00000 + | machreg_to_gpr(rd.to_reg()) + | (cond.invert().bits() << 12) +} + +fn enc_vecmov(is_16b: bool, rd: Writable, rn: Reg) -> u32 { + debug_assert!(!is_16b); // to be supported later. + 0b00001110_101_00000_00011_1_00000_00000 + | machreg_to_vec(rd.to_reg()) + | (machreg_to_vec(rn) << 16) + | (machreg_to_vec(rn) << 5) +} + +fn enc_fpurr(top22: u32, rd: Writable, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrr(top22: u32, rd: Writable, rn: Reg, rm: Reg) -> u32 { + (top22 << 10) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrrr(top17: u32, rd: Writable, rn: Reg, rm: Reg, ra: Reg) -> u32 { + (top17 << 15) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(ra) << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fcmp(is32: bool, rn: Reg, rm: Reg) -> u32 { + let bits = if is32 { + 0b000_11110_00_1_00000_00_1000_00000_00000 + } else { + 0b000_11110_01_1_00000_00_1000_00000_00000 + }; + bits | (machreg_to_vec(rm) << 16) | (machreg_to_vec(rn) << 5) +} + +fn enc_fputoint(top16: u32, rd: Writable, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_inttofpu(top16: u32, rd: Writable, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fround(top22: u32, rd: Writable, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +impl MachInstEmit for Inst { + fn emit(&self, sink: &mut O) { + match self { + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let top11 = match alu_op { + ALUOp::Add32 => 0b00001011_000, + ALUOp::Add64 => 0b10001011_000, + ALUOp::Sub32 => 0b01001011_000, + ALUOp::Sub64 => 0b11001011_000, + ALUOp::Orr32 => 0b00101010_000, + ALUOp::Orr64 => 0b10101010_000, + ALUOp::And32 => 0b00001010_000, + ALUOp::And64 => 0b10001010_000, + ALUOp::Eor32 => 0b01001010_000, + ALUOp::Eor64 => 0b11001010_000, + ALUOp::OrrNot32 => 0b00101010_001, + ALUOp::OrrNot64 => 0b10101010_001, + ALUOp::AndNot32 => 0b00001010_001, + ALUOp::AndNot64 => 0b10001010_001, + ALUOp::EorNot32 => 0b01001010_001, + ALUOp::EorNot64 => 0b11001010_001, + ALUOp::AddS32 => 0b00101011_000, + ALUOp::AddS64 => 0b10101011_000, + ALUOp::SubS32 => 0b01101011_000, + ALUOp::SubS64 => 0b11101011_000, + ALUOp::SDiv64 => 0b10011010_110, + ALUOp::UDiv64 => 0b10011010_110, + ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110, + ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110, + + ALUOp::MAdd32 + | ALUOp::MAdd64 + | ALUOp::MSub32 + | ALUOp::MSub64 + | ALUOp::SMulH + | ALUOp::UMulH => { + //// RRRR ops. + panic!("Bad ALUOp in RRR form!"); + } + }; + let bit15_10 = match alu_op { + ALUOp::SDiv64 => 0b000011, + ALUOp::UDiv64 => 0b000010, + ALUOp::RotR32 | ALUOp::RotR64 => 0b001011, + ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001, + ALUOp::Asr32 | ALUOp::Asr64 => 0b001010, + ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000, + _ => 0b000000, + }; + assert_ne!(writable_stack_reg(), rd); + sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm)); + } + &Inst::AluRRRR { + alu_op, + rd, + rm, + rn, + ra, + } => { + let (top11, bit15) = match alu_op { + ALUOp::MAdd32 => (0b0_00_11011_000, 0), + ALUOp::MSub32 => (0b0_00_11011_000, 1), + ALUOp::MAdd64 => (0b1_00_11011_000, 0), + ALUOp::MSub64 => (0b1_00_11011_000, 1), + ALUOp::SMulH => (0b1_00_11011_010, 0), + ALUOp::UMulH => (0b1_00_11011_110, 0), + _ => unimplemented!("{:?}", alu_op), + }; + sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd)); + } + &Inst::AluRRImm12 { + alu_op, + rd, + rn, + ref imm12, + } => { + let top8 = match alu_op { + ALUOp::Add32 => 0b000_10001, + ALUOp::Add64 => 0b100_10001, + ALUOp::Sub32 => 0b010_10001, + ALUOp::Sub64 => 0b110_10001, + ALUOp::AddS32 => 0b001_10001, + ALUOp::AddS64 => 0b101_10001, + ALUOp::SubS32 => 0b011_10001, + ALUOp::SubS64 => 0b111_10001, + _ => unimplemented!("{:?}", alu_op), + }; + sink.put4(enc_arith_rr_imm12( + top8, + imm12.shift_bits(), + imm12.imm_bits(), + rn, + rd, + )); + } + &Inst::AluRRImmLogic { + alu_op, + rd, + rn, + ref imml, + } => { + let (top9, inv) = match alu_op { + ALUOp::Orr32 => (0b001_100100, false), + ALUOp::Orr64 => (0b101_100100, false), + ALUOp::And32 => (0b000_100100, false), + ALUOp::And64 => (0b100_100100, false), + ALUOp::Eor32 => (0b010_100100, false), + ALUOp::Eor64 => (0b110_100100, false), + ALUOp::OrrNot32 => (0b001_100100, true), + ALUOp::OrrNot64 => (0b101_100100, true), + ALUOp::AndNot32 => (0b000_100100, true), + ALUOp::AndNot64 => (0b100_100100, true), + ALUOp::EorNot32 => (0b010_100100, true), + ALUOp::EorNot64 => (0b110_100100, true), + _ => unimplemented!("{:?}", alu_op), + }; + let imml = if inv { imml.invert() } else { imml.clone() }; + sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd)); + } + + &Inst::AluRRImmShift { + alu_op, + rd, + rn, + ref immshift, + } => { + let amt = immshift.value(); + let (top10, immr, imms) = match alu_op { + ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), amt as u32), + ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), amt as u32), + ALUOp::Lsr32 => (0b0101001100, amt as u32, 0b011111), + ALUOp::Lsr64 => (0b1101001101, amt as u32, 0b111111), + ALUOp::Asr32 => (0b0001001100, amt as u32, 0b011111), + ALUOp::Asr64 => (0b1001001101, amt as u32, 0b111111), + ALUOp::Lsl32 => (0b0101001100, (32 - amt) as u32, (31 - amt) as u32), + ALUOp::Lsl64 => (0b1101001101, (64 - amt) as u32, (63 - amt) as u32), + _ => unimplemented!("{:?}", alu_op), + }; + sink.put4( + (top10 << 22) + | (immr << 16) + | (imms << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + + &Inst::AluRRRShift { + alu_op, + rd, + rn, + rm, + ref shiftop, + } => { + let top11: u16 = match alu_op { + ALUOp::Add32 => 0b000_01011000, + ALUOp::Add64 => 0b100_01011000, + ALUOp::AddS32 => 0b001_01011000, + ALUOp::AddS64 => 0b101_01011000, + ALUOp::Sub32 => 0b010_01011000, + ALUOp::Sub64 => 0b110_01011000, + ALUOp::SubS32 => 0b011_01011000, + ALUOp::SubS64 => 0b111_01011000, + ALUOp::Orr32 => 0b001_01010000, + ALUOp::Orr64 => 0b101_01010000, + ALUOp::And32 => 0b000_01010000, + ALUOp::And64 => 0b100_01010000, + ALUOp::Eor32 => 0b010_01010000, + ALUOp::Eor64 => 0b110_01010000, + ALUOp::OrrNot32 => 0b001_01010001, + ALUOp::OrrNot64 => 0b101_01010001, + ALUOp::EorNot32 => 0b010_01010001, + ALUOp::EorNot64 => 0b110_01010001, + ALUOp::AndNot32 => 0b000_01010001, + ALUOp::AndNot64 => 0b100_01010001, + _ => unimplemented!("{:?}", alu_op), + }; + let top11 = top11 | ((shiftop.op().bits() as u16) << 1); + let bits_15_10 = shiftop.amt().value(); + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::AluRRRExtend { + alu_op, + rd, + rn, + rm, + extendop, + } => { + let top11 = match alu_op { + ALUOp::Add32 => 0b00001011001, + ALUOp::Add64 => 0b10001011001, + ALUOp::Sub32 => 0b01001011001, + ALUOp::Sub64 => 0b11001011001, + ALUOp::AddS32 => 0b00101011001, + ALUOp::AddS64 => 0b10101011001, + ALUOp::SubS32 => 0b01101011001, + ALUOp::SubS64 => 0b11101011001, + _ => unimplemented!("{:?}", alu_op), + }; + let bits_15_10 = extendop.bits() << 3; + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::BitRR { op, rd, rn, .. } => { + let size = if op.is_32_bit() { 0b0 } else { 0b1 }; + let (op1, op2) = match op { + BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000), + BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100), + BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101), + }; + sink.put4(enc_bit_rr(size, op1, op2, rn, rd)) + } + + &Inst::ULoad8 { + rd, + ref mem, + srcloc, + } + | &Inst::SLoad8 { + rd, + ref mem, + srcloc, + } + | &Inst::ULoad16 { + rd, + ref mem, + srcloc, + } + | &Inst::SLoad16 { + rd, + ref mem, + srcloc, + } + | &Inst::ULoad32 { + rd, + ref mem, + srcloc, + } + | &Inst::SLoad32 { + rd, + ref mem, + srcloc, + } + | &Inst::ULoad64 { + rd, + ref mem, + srcloc, + .. + } + | &Inst::FpuLoad32 { + rd, + ref mem, + srcloc, + } + | &Inst::FpuLoad64 { + rd, + ref mem, + srcloc, + } + | &Inst::FpuLoad128 { + rd, + ref mem, + srcloc, + } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); + + for inst in mem_insts.into_iter() { + inst.emit(sink); + } + + // ldst encoding helpers take Reg, not Writable. + let rd = rd.to_reg(); + + // This is the base opcode (top 10 bits) for the "unscaled + // immediate" form (Unscaled). Other addressing modes will OR in + // other values for bits 24/25 (bits 1/2 of this constant). + let op = match self { + &Inst::ULoad8 { .. } => 0b0011100001, + &Inst::SLoad8 { .. } => 0b0011100010, + &Inst::ULoad16 { .. } => 0b0111100001, + &Inst::SLoad16 { .. } => 0b0111100010, + &Inst::ULoad32 { .. } => 0b1011100001, + &Inst::SLoad32 { .. } => 0b1011100010, + &Inst::ULoad64 { .. } => 0b1111100001, + &Inst::FpuLoad32 { .. } => 0b1011110001, + &Inst::FpuLoad64 { .. } => 0b1111110001, + &Inst::FpuLoad128 { .. } => 0b0011110011, + _ => unreachable!(), + }; + + if let Some(srcloc) = srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::OutOfBounds); + } + + match &mem { + &MemArg::Unscaled(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &MemArg::UnsignedOffset(reg, uimm12scaled) => { + sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); + } + &MemArg::RegReg(r1, r2) => { + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => { + match (ty, self) { + (I8, &Inst::ULoad8 { .. }) => {} + (I8, &Inst::SLoad8 { .. }) => {} + (I16, &Inst::ULoad16 { .. }) => {} + (I16, &Inst::SLoad16 { .. }) => {} + (I32, &Inst::ULoad32 { .. }) => {} + (I32, &Inst::SLoad32 { .. }) => {} + (I64, &Inst::ULoad64 { .. }) => {} + (F32, &Inst::FpuLoad32 { .. }) => {} + (F64, &Inst::FpuLoad64 { .. }) => {} + (I128, &Inst::FpuLoad128 { .. }) => {} + _ => panic!("Mismatching reg-scaling type in MemArg"), + } + let extendop = match &mem { + &MemArg::RegScaled(..) => None, + &MemArg::RegScaledExtended(_, _, _, op) => Some(op), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &MemArg::Label(ref label) => { + let offset = match label { + &MemLabel::PCRel(off) => off as u32, + } / 4; + assert!(offset < (1 << 19)); + match self { + &Inst::ULoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011000, offset, rd)); + } + &Inst::SLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b10011000, offset, rd)); + } + &Inst::FpuLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011100, offset, rd)); + } + &Inst::ULoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011000, offset, rd)); + } + &Inst::FpuLoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011100, offset, rd)); + } + &Inst::FpuLoad128 { .. } => { + sink.put4(enc_ldst_imm19(0b10011100, offset, rd)); + } + _ => panic!("Unspported size for LDR from constant pool!"), + } + } + &MemArg::PreIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + } + &MemArg::PostIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + } + // Eliminated by `mem_finalize()` above. + &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { + panic!("Should not see stack-offset here!") + } + } + } + + &Inst::Store8 { + rd, + ref mem, + srcloc, + } + | &Inst::Store16 { + rd, + ref mem, + srcloc, + } + | &Inst::Store32 { + rd, + ref mem, + srcloc, + } + | &Inst::Store64 { + rd, + ref mem, + srcloc, + .. + } + | &Inst::FpuStore32 { + rd, + ref mem, + srcloc, + } + | &Inst::FpuStore64 { + rd, + ref mem, + srcloc, + } + | &Inst::FpuStore128 { + rd, + ref mem, + srcloc, + } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); + + for inst in mem_insts.into_iter() { + inst.emit(sink); + } + + let op = match self { + &Inst::Store8 { .. } => 0b0011100000, + &Inst::Store16 { .. } => 0b0111100000, + &Inst::Store32 { .. } => 0b1011100000, + &Inst::Store64 { .. } => 0b1111100000, + &Inst::FpuStore32 { .. } => 0b1011110000, + &Inst::FpuStore64 { .. } => 0b1111110000, + &Inst::FpuStore128 { .. } => 0b0011110010, + _ => unreachable!(), + }; + + if let Some(srcloc) = srcloc { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::OutOfBounds); + } + + match &mem { + &MemArg::Unscaled(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &MemArg::UnsignedOffset(reg, uimm12scaled) => { + sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); + } + &MemArg::RegReg(r1, r2) => { + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &MemArg::RegScaled(r1, r2, _ty) + | &MemArg::RegScaledExtended(r1, r2, _ty, _) => { + let extendop = match &mem { + &MemArg::RegScaled(..) => None, + &MemArg::RegScaledExtended(_, _, _, op) => Some(op), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &MemArg::Label(..) => { + panic!("Store to a MemLabel not implemented!"); + } + &MemArg::PreIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + } + &MemArg::PostIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + } + // Eliminated by `mem_finalize()` above. + &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { + panic!("Should not see stack-offset here!") + } + } + } + + &Inst::StoreP64 { rt, rt2, ref mem } => match mem { + &PairMemArg::SignedOffset(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); + } + &PairMemArg::PreIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2)); + } + &PairMemArg::PostIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2)); + } + }, + &Inst::LoadP64 { rt, rt2, ref mem } => { + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + match mem { + &PairMemArg::SignedOffset(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); + } + &PairMemArg::PreIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2)); + } + &PairMemArg::PostIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2)); + } + } + } + &Inst::Mov { rd, rm } => { + assert!(rd.to_reg().get_class() == rm.get_class()); + assert!(rm.get_class() == RegClass::I64); + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); + } + &Inst::Mov32 { rd, rm } => { + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); + } + &Inst::MovZ { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm)), + &Inst::MovN { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm)), + &Inst::MovK { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm)), + &Inst::CSel { rd, rn, rm, cond } => { + sink.put4(enc_csel(rd, rn, rm, cond)); + } + &Inst::CSet { rd, cond } => { + sink.put4(enc_cset(rd, cond)); + } + &Inst::FpuMove64 { rd, rn } => { + sink.put4(enc_vecmov(/* 16b = */ false, rd, rn)); + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let top22 = match fpu_op { + FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000, + FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000, + FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000, + FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000, + FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000, + FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000, + FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000, + FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000, + }; + sink.put4(enc_fpurr(top22, rd, rn)); + } + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let top22 = match fpu_op { + FPUOp2::Add32 => 0b000_11110_00_1_00000_001010, + FPUOp2::Add64 => 0b000_11110_01_1_00000_001010, + FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110, + FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110, + FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010, + FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010, + FPUOp2::Div32 => 0b000_11110_00_1_00000_000110, + FPUOp2::Div64 => 0b000_11110_01_1_00000_000110, + FPUOp2::Max32 => 0b000_11110_00_1_00000_010010, + FPUOp2::Max64 => 0b000_11110_01_1_00000_010010, + FPUOp2::Min32 => 0b000_11110_00_1_00000_010110, + FPUOp2::Min64 => 0b000_11110_01_1_00000_010110, + }; + sink.put4(enc_fpurrr(top22, rd, rn, rm)); + } + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { + let top17 = match fpu_op { + FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0, + FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0, + }; + sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); + } + &Inst::FpuCmp32 { rn, rm } => { + sink.put4(enc_fcmp(/* is32 = */ true, rn, rm)); + } + &Inst::FpuCmp64 { rn, rm } => { + sink.put4(enc_fcmp(/* is32 = */ false, rn, rm)); + } + &Inst::FpuToInt { op, rd, rn } => { + let top16 = match op { + // FCVTZS (32/32-bit) + FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000, + // FCVTZU (32/32-bit) + FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001, + // FCVTZS (32/64-bit) + FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000, + // FCVTZU (32/64-bit) + FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001, + // FCVTZS (64/32-bit) + FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000, + // FCVTZU (64/32-bit) + FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001, + // FCVTZS (64/64-bit) + FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000, + // FCVTZU (64/64-bit) + FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001, + }; + sink.put4(enc_fputoint(top16, rd, rn)); + } + &Inst::IntToFpu { op, rd, rn } => { + let top16 = match op { + // SCVTF (32/32-bit) + IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010, + // UCVTF (32/32-bit) + IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011, + // SCVTF (64/32-bit) + IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010, + // UCVTF (64/32-bit) + IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011, + // SCVTF (32/64-bit) + IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010, + // UCVTF (32/64-bit) + IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011, + // SCVTF (64/64-bit) + IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010, + // UCVTF (64/64-bit) + IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011, + }; + sink.put4(enc_inttofpu(top16, rd, rn)); + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let inst = Inst::FpuLoad32 { + rd, + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, + }; + inst.emit(sink); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(8), + }; + inst.emit(sink); + sink.put4(const_data.to_bits()); + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let inst = Inst::FpuLoad64 { + rd, + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, + }; + inst.emit(sink); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink); + sink.put8(const_data.to_bits()); + } + &Inst::FpuCSel32 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ true)); + } + &Inst::FpuCSel64 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, /* is32 = */ false)); + } + &Inst::FpuRound { op, rd, rn } => { + let top22 = match op { + FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000, + FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000, + FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000, + FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000, + FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000, + FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000, + FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000, + FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000, + }; + sink.put4(enc_fround(top22, rd, rn)); + } + &Inst::MovToVec64 { rd, rn } => { + sink.put4( + 0b010_01110000_01000_0_0011_1_00000_00000 + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::MovFromVec64 { rd, rn } => { + sink.put4( + 0b010_01110000_01000_0_0111_1_00000_00000 + | (machreg_to_vec(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::VecRRR { rd, rn, rm, alu_op } => { + let (top11, bit15_10) = match alu_op { + VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011), + VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011), + VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011), + VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011), + }; + sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); + } + &Inst::MovToNZCV { rn } => { + sink.put4(0xd51b4200 | machreg_to_gpr(rn)); + } + &Inst::MovFromNZCV { rd } => { + sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); + } + &Inst::CondSet { rd, cond } => { + sink.put4( + 0b100_11010100_11111_0000_01_11111_00000 + | (cond.invert().bits() << 12) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits >= 8 => { + let top22 = match (signed, from_bits, to_bits) { + (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32) + (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32) + (true, 8, 32) => 0b000_100110_0_000000_000111, // SXTB (32) + (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32) + // The 64-bit unsigned variants are the same as the 32-bit ones, + // because writes to Wn zero out the top 32 bits of Xn + (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64) + (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64) + (true, 8, 64) => 0b100_100110_1_000000_000111, // SXTB (64) + (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64) + // 32-to-64: the unsigned case is a 'mov' (special-cased below). + (false, 32, 64) => 0, // MOV + (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64) + _ => panic!( + "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}", + signed, from_bits, to_bits + ), + }; + if top22 != 0 { + sink.put4(enc_extend(top22, rd, rn)); + } else { + Inst::mov32(rd, rn).emit(sink); + } + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && signed => { + assert!(to_bits <= 64); + // Reduce sign-extend-from-1-bit to: + // - and rd, rn, #1 + // - sub rd, zr, rd + + // We don't have ImmLogic yet, so we just hardcode this. FIXME. + sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())); + let sub_inst = Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd, + rn: zero_reg(), + rm: rd.to_reg(), + }; + sub_inst.emit(sink); + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && !signed => { + assert!(to_bits <= 64); + // Reduce zero-extend-from-1-bit to: + // - and rd, rn, #1 + + // We don't have ImmLogic yet, so we just hardcode this. FIXME. + sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())); + } + &Inst::Extend { .. } => { + panic!("Unsupported extend variant"); + } + &Inst::Jump { ref dest } => { + // TODO: differentiate between as_off26() returning `None` for + // out-of-range vs. not-yet-finalized. The latter happens when we + // do early (fake) emission for size computation. + sink.put4(enc_jump26(0b000101, dest.as_off26().unwrap())); + } + &Inst::Ret {} => { + sink.put4(0xd65f03c0); + } + &Inst::EpiloguePlaceholder {} => { + // Noop; this is just a placeholder for epilogues. + } + &Inst::Call { + ref dest, + loc, + opcode, + .. + } => { + sink.add_reloc(loc, Reloc::Arm64Call, dest, 0); + sink.put4(enc_jump26(0b100101, 0)); + if opcode.is_call() { + sink.add_call_site(loc, opcode); + } + } + &Inst::CallInd { + rn, loc, opcode, .. + } => { + sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)); + if opcode.is_call() { + sink.add_call_site(loc, opcode); + } + } + &Inst::CondBr { .. } => panic!("Unlowered CondBr during binemit!"), + &Inst::CondBrLowered { target, kind } => match kind { + // TODO: handle >2^19 case by emitting a compound sequence with + // an unconditional (26-bit) branch. We need branch-relaxation + // adjustment machinery to enable this (because we don't want to + // always emit the long form). + CondBrKind::Zero(reg) => { + sink.put4(enc_cmpbr(0b1_011010_0, target.as_off19().unwrap(), reg)); + } + CondBrKind::NotZero(reg) => { + sink.put4(enc_cmpbr(0b1_011010_1, target.as_off19().unwrap(), reg)); + } + CondBrKind::Cond(c) => { + sink.put4(enc_cbr( + 0b01010100, + target.as_off19().unwrap_or(0), + 0b0, + c.bits(), + )); + } + }, + &Inst::CondBrLoweredCompound { + taken, + not_taken, + kind, + } => { + // Conditional part first. + match kind { + CondBrKind::Zero(reg) => { + sink.put4(enc_cmpbr(0b1_011010_0, taken.as_off19().unwrap(), reg)); + } + CondBrKind::NotZero(reg) => { + sink.put4(enc_cmpbr(0b1_011010_1, taken.as_off19().unwrap(), reg)); + } + CondBrKind::Cond(c) => { + sink.put4(enc_cbr( + 0b01010100, + taken.as_off19().unwrap_or(0), + 0b0, + c.bits(), + )); + } + } + // Unconditional part. + sink.put4(enc_jump26(0b000101, not_taken.as_off26().unwrap_or(0))); + } + &Inst::IndirectBr { rn, .. } => { + sink.put4(enc_br(rn)); + } + &Inst::Nop => {} + &Inst::Nop4 => { + sink.put4(0xd503201f); + } + &Inst::Brk => { + sink.put4(0xd4200000); + } + &Inst::Udf { trap_info } => { + let (srcloc, code) = trap_info; + sink.add_trap(srcloc, code); + sink.put4(0xd4a00000); + } + &Inst::Adr { rd, ref label } => { + let off = memlabel_finalize(sink.cur_offset_from_start(), label); + assert!(off > -(1 << 20)); + assert!(off < (1 << 20)); + sink.put4(enc_adr(off, rd)); + } + &Inst::Word4 { data } => { + sink.put4(data); + } + &Inst::Word8 { data } => { + sink.put8(data); + } + &Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + ref targets, + .. + } => { + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + // + // N.B.: if PC-rel addressing on ADR below is changed, also update + // `Inst::with_block_offsets()` in arm64/inst/mod.rs. + + // Save index in a tmp (the live range of ridx only goes to start of this + // sequence; rtmp1 or rtmp2 may overwrite it). + let inst = Inst::gen_move(rtmp2, ridx, I64); + inst.emit(sink); + // Load address of jump table + let inst = Inst::Adr { + rd: rtmp1, + label: MemLabel::PCRel(16), + }; + inst.emit(sink); + // Load value out of jump table + let inst = Inst::SLoad32 { + rd: rtmp2, + mem: MemArg::reg_reg_scaled_extended( + rtmp1.to_reg(), + rtmp2.to_reg(), + I32, + ExtendOp::UXTW, + ), + srcloc: None, // can't cause a user trap. + }; + inst.emit(sink); + // Add base of jump table to jump-table-sourced block offset + let inst = Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: rtmp1, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + }; + inst.emit(sink); + // Branch to computed address. (`targets` here is only used for successor queries + // and is not needed for emission.) + let inst = Inst::IndirectBr { + rn: rtmp1.to_reg(), + targets: vec![], + }; + inst.emit(sink); + // Emit jump table (table of 32-bit offsets). + for target in targets { + let off = target.as_offset_words() * 4; + let off = off as i32 as u32; + sink.put4(off); + } + } + &Inst::LoadConst64 { rd, const_data } => { + let inst = Inst::ULoad64 { + rd, + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, // can't cause a user trap. + }; + inst.emit(sink); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink); + sink.put8(const_data); + } + &Inst::LoadExtName { + rd, + ref name, + offset, + srcloc, + } => { + let inst = Inst::ULoad64 { + rd, + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, // can't cause a user trap. + }; + inst.emit(sink); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink); + sink.add_reloc(srcloc, Reloc::Abs8, name, offset); + sink.put8(0); + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::isa::test_utils; + + #[test] + fn test_arm64_binemit() { + let mut insns = Vec::<(Inst, &str, &str)>::new(); + + // N.B.: the architecture is little-endian, so when transcribing the 32-bit + // hex instructions from e.g. objdump disassembly, one must swap the bytes + // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`, + // but we write it here as C0035FD6.) + + // Useful helper script to produce the encodings from the text: + // + // #!/bin/sh + // tmp=`mktemp /tmp/XXXXXXXX.o` + // aarch64-linux-gnu-as /dev/stdin -o $tmp + // aarch64-linux-gnu-objdump -d $tmp + // rm -f $tmp + // + // Then: + // + // $ echo "mov x1, x2" | arm64inst.sh + insns.push((Inst::Ret {}, "C0035FD6", "ret")); + insns.push((Inst::Nop {}, "", "nop-zero-len")); + insns.push((Inst::Nop4 {}, "1F2003D5", "nop")); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100030B", + "add w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400068B", + "add x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100034B", + "sub w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006CB", + "sub x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100032A", + "orr w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006AA", + "orr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100030A", + "and w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400068A", + "and x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubS32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100036B", + "subs w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006EB", + "subs x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddS32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100032B", + "adds w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006AB", + "adds x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SDiv64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40CC69A", + "sdiv x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::UDiv64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A408C69A", + "udiv x4, x5, x6", + )); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Eor32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400064A", + "eor w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006CA", + "eor x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400260A", + "bic w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400268A", + "bic x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400262A", + "orn w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40026AA", + "orn x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::EorNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400264A", + "eon w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::EorNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40026CA", + "eon x4, x5, x6", + )); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::RotR32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A42CC61A", + "ror w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::RotR64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A42CC69A", + "ror x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsr32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A424C61A", + "lsr w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A424C69A", + "lsr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Asr32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A428C61A", + "asr w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Asr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A428C69A", + "asr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsl32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A420C61A", + "lsl w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsl64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A420C69A", + "lsl x4, x5, x6", + )); + + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0411", + "add w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: true, + }, + }, + "078D4411", + "add w7, w8, #1191936", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0491", + "add x7, x8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Sub32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0451", + "sub w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Sub64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D04D1", + "sub x7, x8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::SubS32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0471", + "subs w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::SubS64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D04F1", + "subs x7, x8, #291", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + rm: xreg(9), + extendop: ExtendOp::SXTB, + }, + "0781290B", + "add w7, w8, w9, SXTB", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Add64, + rd: writable_xreg(15), + rn: xreg(16), + rm: xreg(17), + extendop: ExtendOp::UXTB, + }, + "0F02318B", + "add x15, x16, x17, UXTB", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Sub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + extendop: ExtendOp::SXTH, + }, + "41A0234B", + "sub w1, w2, w3, SXTH", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Sub64, + rd: writable_xreg(20), + rn: xreg(21), + rm: xreg(22), + extendop: ExtendOp::UXTW, + }, + "B44236CB", + "sub x20, x21, x22, UXTW", + )); + + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Add32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(20).unwrap(), + ), + }, + "6A510C0B", + "add w10, w11, w12, LSL 20", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Add64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::ASR, + ShiftOpShiftImm::maybe_from_shift(42).unwrap(), + ), + }, + "6AA98C8B", + "add x10, x11, x12, ASR 42", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Sub32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C4B", + "sub w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Sub64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CCB", + "sub x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Orr32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C2A", + "orr w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Orr64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CAA", + "orr x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::And32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C0A", + "and w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::And64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C8A", + "and x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Eor32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C4A", + "eor w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Eor64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CCA", + "eor x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::OrrNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C2A", + "orn w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::OrrNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2CAA", + "orn x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AndNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C0A", + "bic w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AndNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C8A", + "bic x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::EorNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C4A", + "eon w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::EorNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2CCA", + "eon x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AddS32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C2B", + "adds w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AddS64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CAB", + "adds x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::SubS32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C6B", + "subs w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::SubS64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CEB", + "subs x10, x11, x12, LSL 23", + )); + + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::MAdd32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110031B", + "madd w1, w2, w3, w4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::MAdd64, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110039B", + "madd x1, x2, x3, x4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::MSub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4190031B", + "msub w1, w2, w3, w4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::MSub64, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4190039B", + "msub x1, x2, x3, x4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::SMulH, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: zero_reg(), + }, + "417C439B", + "smulh x1, x2, x3", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp::UMulH, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: zero_reg(), + }, + "417CC39B", + "umulh x1, x2, x3", + )); + + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::RotR32, + rd: writable_xreg(20), + rn: xreg(21), + immshift: ImmShift::maybe_from_u64(19).unwrap(), + }, + "B44E9513", + "ror w20, w21, #19", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::RotR64, + rd: writable_xreg(20), + rn: xreg(21), + immshift: ImmShift::maybe_from_u64(42).unwrap(), + }, + "B4AAD593", + "ror x20, x21, #42", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsr32, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(13).unwrap(), + }, + "6A7D0D53", + "lsr w10, w11, #13", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(57).unwrap(), + }, + "6AFD79D3", + "lsr x10, x11, #57", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Asr32, + rd: writable_xreg(4), + rn: xreg(5), + immshift: ImmShift::maybe_from_u64(7).unwrap(), + }, + "A47C0713", + "asr w4, w5, #7", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Asr64, + rd: writable_xreg(4), + rn: xreg(5), + immshift: ImmShift::maybe_from_u64(35).unwrap(), + }, + "A4FC6393", + "asr x4, x5, #35", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl32, + rd: writable_xreg(8), + rn: xreg(9), + immshift: ImmShift::maybe_from_u64(24).unwrap(), + }, + "281D0853", + "lsl w8, w9, #24", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl64, + rd: writable_xreg(8), + rn: xreg(9), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }, + "280141D3", + "lsl x8, x9, #63", + )); + + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::And32, + rd: writable_xreg(21), + rn: xreg(27), + imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(), + }, + "753B0112", + "and w21, w27, #2147500031", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::And64, + rd: writable_xreg(7), + rn: xreg(6), + imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(), + }, + "C7381592", + "and x7, x6, #288221580125796352", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Orr32, + rd: writable_xreg(1), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(), + }, + "A1000C32", + "orr w1, w5, #1048576", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Orr64, + rd: writable_xreg(4), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(), + }, + "A4C401B2", + "orr x4, x5, #9331882296111890817", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Eor32, + rd: writable_xreg(1), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(), + }, + "A1380052", + "eor w1, w5, #32767", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Eor64, + rd: writable_xreg(10), + rn: xreg(8), + imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(), + }, + "0AC501D2", + "eor x10, x8, #9331882296111890817", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::RBit32, + rd: writable_xreg(1), + rn: xreg(10), + }, + "4101C05A", + "rbit w1, w10", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::RBit64, + rd: writable_xreg(1), + rn: xreg(10), + }, + "4101C0DA", + "rbit x1, x10", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Clz32, + rd: writable_xreg(15), + rn: xreg(3), + }, + "6F10C05A", + "clz w15, w3", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Clz64, + rd: writable_xreg(15), + rn: xreg(3), + }, + "6F10C0DA", + "clz x15, x3", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Cls32, + rd: writable_xreg(21), + rn: xreg(16), + }, + "1516C05A", + "cls w21, w16", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Cls64, + rd: writable_xreg(21), + rn: xreg(16), + }, + "1516C0DA", + "cls x21, x16", + )); + + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "41004038", + "ldurb w1, [x2]", + )); + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)), + srcloc: None, + }, + "41004039", + "ldrb w1, [x2]", + )); + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: MemArg::RegReg(xreg(2), xreg(5)), + srcloc: None, + }, + "41686538", + "ldrb w1, [x2, x5]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "41008038", + "ldursb x1, [x2]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()), + srcloc: None, + }, + "41FC8039", + "ldrsb x1, [x2, #63]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: MemArg::RegReg(xreg(2), xreg(5)), + srcloc: None, + }, + "4168A538", + "ldrsb x1, [x2, x5]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()), + srcloc: None, + }, + "41504078", + "ldurh w1, [x2, #5]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: MemArg::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()), + srcloc: None, + }, + "41104079", + "ldrh w1, [x2, #8]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: MemArg::RegScaled(xreg(2), xreg(3), I16), + srcloc: None, + }, + "41786378", + "ldrh w1, [x2, x3, LSL #1]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "41008078", + "ldursh x1, [x2]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(28), + mem: MemArg::UnsignedOffset( + xreg(20), + UImm12Scaled::maybe_from_i64(24, I16).unwrap(), + ), + srcloc: None, + }, + "9C328079", + "ldrsh x28, [x20, #24]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(28), + mem: MemArg::RegScaled(xreg(20), xreg(20), I16), + srcloc: None, + }, + "9C7AB478", + "ldrsh x28, [x20, x20, LSL #1]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "410040B8", + "ldur w1, [x2]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(12), + mem: MemArg::UnsignedOffset( + xreg(0), + UImm12Scaled::maybe_from_i64(204, I32).unwrap(), + ), + srcloc: None, + }, + "0CCC40B9", + "ldr w12, [x0, #204]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(1), + mem: MemArg::RegScaled(xreg(2), xreg(12), I32), + srcloc: None, + }, + "41786CB8", + "ldr w1, [x2, x12, LSL #2]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "410080B8", + "ldursw x1, [x2]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(12), + mem: MemArg::UnsignedOffset( + xreg(1), + UImm12Scaled::maybe_from_i64(16380, I32).unwrap(), + ), + srcloc: None, + }, + "2CFCBFB9", + "ldrsw x12, [x1, #16380]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(1), + mem: MemArg::RegScaled(xreg(5), xreg(1), I32), + srcloc: None, + }, + "A178A1B8", + "ldrsw x1, [x5, x1, LSL #2]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "410040F8", + "ldur x1, [x2]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()), + srcloc: None, + }, + "410050F8", + "ldur x1, [x2, #-256]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()), + srcloc: None, + }, + "41F04FF8", + "ldur x1, [x2, #255]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::UnsignedOffset( + xreg(2), + UImm12Scaled::maybe_from_i64(32760, I64).unwrap(), + ), + srcloc: None, + }, + "41FC7FF9", + "ldr x1, [x2, #32760]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegReg(xreg(2), xreg(3)), + srcloc: None, + }, + "416863F8", + "ldr x1, [x2, x3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegScaled(xreg(2), xreg(3), I64), + srcloc: None, + }, + "417863F8", + "ldr x1, [x2, x3, LSL #3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW), + srcloc: None, + }, + "41D863F8", + "ldr x1, [x2, w3, SXTW #3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::Label(MemLabel::PCRel(64)), + srcloc: None, + }, + "01020058", + "ldr x1, pc+64", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + srcloc: None, + }, + "410C41F8", + "ldr x1, [x2, #16]!", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + srcloc: None, + }, + "410441F8", + "ldr x1, [x2], #16", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::FPOffset(32768), + srcloc: None, + }, + "0F0090D2EF011D8BE10140F9", + "movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::FPOffset(-32768), + srcloc: None, + }, + "EFFF8F92EF011D8BE10140F9", + "movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::FPOffset(1048576), // 2^20 + srcloc: None, + }, + "0F02A0D2EF011D8BE10140F9", + "movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 + srcloc: None, + }, + "2F0080D20F02A0F2EF011D8BE10140F9", + "movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", + )); + + insns.push(( + Inst::Store8 { + rd: xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "41000038", + "sturb w1, [x2]", + )); + insns.push(( + Inst::Store8 { + rd: xreg(1), + mem: MemArg::UnsignedOffset( + xreg(2), + UImm12Scaled::maybe_from_i64(4095, I8).unwrap(), + ), + srcloc: None, + }, + "41FC3F39", + "strb w1, [x2, #4095]", + )); + insns.push(( + Inst::Store16 { + rd: xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "41000078", + "sturh w1, [x2]", + )); + insns.push(( + Inst::Store16 { + rd: xreg(1), + mem: MemArg::UnsignedOffset( + xreg(2), + UImm12Scaled::maybe_from_i64(8190, I16).unwrap(), + ), + srcloc: None, + }, + "41FC3F79", + "strh w1, [x2, #8190]", + )); + insns.push(( + Inst::Store32 { + rd: xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "410000B8", + "stur w1, [x2]", + )); + insns.push(( + Inst::Store32 { + rd: xreg(1), + mem: MemArg::UnsignedOffset( + xreg(2), + UImm12Scaled::maybe_from_i64(16380, I32).unwrap(), + ), + srcloc: None, + }, + "41FC3FB9", + "str w1, [x2, #16380]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::Unscaled(xreg(2), SImm9::zero()), + srcloc: None, + }, + "410000F8", + "stur x1, [x2]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::UnsignedOffset( + xreg(2), + UImm12Scaled::maybe_from_i64(32760, I64).unwrap(), + ), + srcloc: None, + }, + "41FC3FF9", + "str x1, [x2, #32760]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::RegReg(xreg(2), xreg(3)), + srcloc: None, + }, + "416823F8", + "str x1, [x2, x3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::RegScaled(xreg(2), xreg(3), I64), + srcloc: None, + }, + "417823F8", + "str x1, [x2, x3, LSL #3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW), + srcloc: None, + }, + "415823F8", + "str x1, [x2, w3, UXTW #3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + srcloc: None, + }, + "410C01F8", + "str x1, [x2, #16]!", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: MemArg::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + srcloc: None, + }, + "410401F8", + "str x1, [x2], #16", + )); + + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + }, + "482500A9", + "stp x8, x9, [x10]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairMemArg::SignedOffset( + xreg(10), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + }, + "48A51FA9", + "stp x8, x9, [x10, #504]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairMemArg::SignedOffset( + xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + }, + "48253CA9", + "stp x8, x9, [x10, #-64]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(21), + rt2: xreg(28), + mem: PairMemArg::SignedOffset( + xreg(1), + SImm7Scaled::maybe_from_i64(-512, I64).unwrap(), + ), + }, + "357020A9", + "stp x21, x28, [x1, #-512]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairMemArg::PreIndexed( + writable_xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + }, + "4825BCA9", + "stp x8, x9, [x10, #-64]!", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(15), + rt2: xreg(16), + mem: PairMemArg::PostIndexed( + writable_xreg(20), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + }, + "8FC29FA8", + "stp x15, x16, [x20], #504", + )); + + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairMemArg::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + }, + "482540A9", + "ldp x8, x9, [x10]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairMemArg::SignedOffset( + xreg(10), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + }, + "48A55FA9", + "ldp x8, x9, [x10, #504]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairMemArg::SignedOffset( + xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + }, + "48257CA9", + "ldp x8, x9, [x10, #-64]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairMemArg::SignedOffset( + xreg(10), + SImm7Scaled::maybe_from_i64(-512, I64).unwrap(), + ), + }, + "482560A9", + "ldp x8, x9, [x10, #-512]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairMemArg::PreIndexed( + writable_xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + }, + "4825FCA9", + "ldp x8, x9, [x10, #-64]!", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(25), + mem: PairMemArg::PostIndexed( + writable_xreg(12), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + }, + "88E5DFA8", + "ldp x8, x25, [x12], #504", + )); + + insns.push(( + Inst::Mov { + rd: writable_xreg(8), + rm: xreg(9), + }, + "E80309AA", + "mov x8, x9", + )); + insns.push(( + Inst::Mov32 { + rd: writable_xreg(8), + rm: xreg(9), + }, + "E803092A", + "mov w8, w9", + )); + + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "E8FF9FD2", + "movz x8, #65535", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "E8FFBFD2", + "movz x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "E8FFDFD2", + "movz x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "E8FFFFD2", + "movz x8, #65535, LSL #48", + )); + + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "E8FF9F92", + "movn x8, #65535", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "E8FFBF92", + "movn x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "E8FFDF92", + "movn x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "E8FFFF92", + "movn x8, #65535, LSL #48", + )); + + insns.push(( + Inst::MovK { + rd: writable_xreg(12), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(), + }, + "0C0080F2", + "movk x12, #0", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(19), + imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(), + }, + "1300A0F2", + "movk x19, #0, LSL #16", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(3), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "E3FF9FF2", + "movk x3, #65535", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "E8FFBFF2", + "movk x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "E8FFDFF2", + "movk x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "E8FFFFF2", + "movk x8, #65535, LSL #48", + )); + + insns.push(( + Inst::CSel { + rd: writable_xreg(10), + rn: xreg(12), + rm: xreg(14), + cond: Cond::Hs, + }, + "8A218E9A", + "csel x10, x12, x14, hs", + )); + insns.push(( + Inst::CSet { + rd: writable_xreg(15), + cond: Cond::Ge, + }, + "EFB79F9A", + "cset x15, ge", + )); + insns.push(( + Inst::MovToVec64 { + rd: writable_vreg(20), + rn: xreg(21), + }, + "B41E084E", + "mov v20.d[0], x21", + )); + insns.push(( + Inst::MovFromVec64 { + rd: writable_xreg(21), + rn: vreg(20), + }, + "953E084E", + "mov x21, v20.d[0]", + )); + insns.push(( + Inst::MovToNZCV { rn: xreg(13) }, + "0D421BD5", + "msr nzcv, x13", + )); + insns.push(( + Inst::MovFromNZCV { + rd: writable_xreg(27), + }, + "1B423BD5", + "mrs x27, nzcv", + )); + insns.push(( + Inst::CondSet { + rd: writable_xreg(5), + cond: Cond::Hi, + }, + "E5979F9A", + "cset x5, hi", + )); + insns.push(( + Inst::VecRRR { + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + alu_op: VecALUOp::UQAddScalar, + }, + "D50EF77E", + "uqadd d21, d22, d23", + )); + insns.push(( + Inst::VecRRR { + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + alu_op: VecALUOp::SQAddScalar, + }, + "D50EF75E", + "sqadd d21, d22, d23", + )); + insns.push(( + Inst::VecRRR { + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + alu_op: VecALUOp::UQSubScalar, + }, + "D52EF77E", + "uqsub d21, d22, d23", + )); + insns.push(( + Inst::VecRRR { + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + alu_op: VecALUOp::SQSubScalar, + }, + "D52EF75E", + "sqsub d21, d22, d23", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 8, + to_bits: 32, + }, + "411C0053", + "uxtb w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 8, + to_bits: 32, + }, + "411C0013", + "sxtb w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 16, + to_bits: 32, + }, + "413C0053", + "uxth w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 16, + to_bits: 32, + }, + "413C0013", + "sxth w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 8, + to_bits: 64, + }, + "411C0053", + "uxtb x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 8, + to_bits: 64, + }, + "411C4093", + "sxtb x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 16, + to_bits: 64, + }, + "413C0053", + "uxth x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 16, + to_bits: 64, + }, + "413C4093", + "sxth x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 32, + to_bits: 64, + }, + "E103022A", + "mov w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 32, + to_bits: 64, + }, + "417C4093", + "sxtw x1, w2", + )); + + insns.push(( + Inst::Jump { + dest: BranchTarget::ResolvedOffset(64), + }, + "10000014", + "b 64", + )); + + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Zero(xreg(8)), + }, + "080200B4", + "cbz x8, 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::NotZero(xreg(8)), + }, + "080200B5", + "cbnz x8, 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Eq), + }, + "00020054", + "b.eq 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Ne), + }, + "01020054", + "b.ne 64", + )); + + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Hs), + }, + "02020054", + "b.hs 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Lo), + }, + "03020054", + "b.lo 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Mi), + }, + "04020054", + "b.mi 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Pl), + }, + "05020054", + "b.pl 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Vs), + }, + "06020054", + "b.vs 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Vc), + }, + "07020054", + "b.vc 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Hi), + }, + "08020054", + "b.hi 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Ls), + }, + "09020054", + "b.ls 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Ge), + }, + "0A020054", + "b.ge 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Lt), + }, + "0B020054", + "b.lt 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Gt), + }, + "0C020054", + "b.gt 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Le), + }, + "0D020054", + "b.le 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Al), + }, + "0E020054", + "b.al 64", + )); + insns.push(( + Inst::CondBrLowered { + target: BranchTarget::ResolvedOffset(64), + kind: CondBrKind::Cond(Cond::Nv), + }, + "0F020054", + "b.nv 64", + )); + + insns.push(( + Inst::CondBrLoweredCompound { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + kind: CondBrKind::Cond(Cond::Le), + }, + "0D02005420000014", + "b.le 64 ; b 128", + )); + + insns.push(( + Inst::Call { + dest: ExternalName::testcase("test0"), + uses: Set::empty(), + defs: Set::empty(), + loc: SourceLoc::default(), + opcode: Opcode::Call, + }, + "00000094", + "bl 0", + )); + + insns.push(( + Inst::CallInd { + rn: xreg(10), + uses: Set::empty(), + defs: Set::empty(), + loc: SourceLoc::default(), + opcode: Opcode::CallIndirect, + }, + "40013FD6", + "blr x10", + )); + + insns.push(( + Inst::IndirectBr { + rn: xreg(3), + targets: vec![1, 2, 3], + }, + "60001FD6", + "br x3", + )); + + insns.push((Inst::Brk, "000020D4", "brk #0")); + + insns.push(( + Inst::Adr { + rd: writable_xreg(15), + label: MemLabel::PCRel((1 << 20) - 4), + }, + "EFFF7F10", + "adr x15, pc+1048572", + )); + + insns.push(( + Inst::FpuMove64 { + rd: writable_vreg(8), + rn: vreg(4), + }, + "881CA40E", + "mov v8.8b, v4.8b", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3201E", + "fabs s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3601E", + "fabs d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43211E", + "fneg s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43611E", + "fneg d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3211E", + "fsqrt s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3611E", + "fsqrt d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt32To64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3221E", + "fcvt d15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt64To32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43621E", + "fcvt s15, d30", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF2B3F1E", + "fadd s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF2B7F1E", + "fadd d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF3B3F1E", + "fsub s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF3B7F1E", + "fsub d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF0B3F1E", + "fmul s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF0B7F1E", + "fmul d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF1B3F1E", + "fdiv s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF1B7F1E", + "fdiv d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Max32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF4B3F1E", + "fmax s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Max64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF4B7F1E", + "fmax d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Min32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF5B3F1E", + "fmin s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Min64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF5B7F1E", + "fmin d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + ra: vreg(1), + }, + "CF071F1F", + "fmadd s15, s30, s31, s1", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + ra: vreg(1), + }, + "CF075F1F", + "fmadd d15, d30, d31, d1", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100391E", + "fcvtzu w1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100399E", + "fcvtzu x1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100381E", + "fcvtzs w1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100389E", + "fcvtzs x1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100791E", + "fcvtzu w1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100799E", + "fcvtzu x1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100781E", + "fcvtzs w1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100789E", + "fcvtzs x1, d4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100231E", + "ucvtf s1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100221E", + "scvtf s1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100631E", + "ucvtf d1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100621E", + "scvtf d1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100239E", + "ucvtf s1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100229E", + "scvtf s1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100639E", + "ucvtf d1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100629E", + "scvtf d1, x4", + )); + + insns.push(( + Inst::FpuCmp32 { + rn: vreg(23), + rm: vreg(24), + }, + "E022381E", + "fcmp s23, s24", + )); + + insns.push(( + Inst::FpuCmp64 { + rn: vreg(23), + rm: vreg(24), + }, + "E022781E", + "fcmp d23, d24", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), F32), + srcloc: None, + }, + "107969BC", + "ldr s16, [x8, x9, LSL #2]", + )); + + insns.push(( + Inst::FpuLoad64 { + rd: writable_vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), F64), + srcloc: None, + }, + "107969FC", + "ldr d16, [x8, x9, LSL #3]", + )); + + insns.push(( + Inst::FpuLoad128 { + rd: writable_vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), I128), + srcloc: None, + }, + "1079E93C", + "ldr q16, [x8, x9, LSL #4]", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_vreg(16), + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, + }, + "5000001C", + "ldr s16, pc+8", + )); + + insns.push(( + Inst::FpuLoad64 { + rd: writable_vreg(16), + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, + }, + "5000005C", + "ldr d16, pc+8", + )); + + insns.push(( + Inst::FpuLoad128 { + rd: writable_vreg(16), + mem: MemArg::Label(MemLabel::PCRel(8)), + srcloc: None, + }, + "5000009C", + "ldr q16, pc+8", + )); + + insns.push(( + Inst::FpuStore32 { + rd: vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), F32), + srcloc: None, + }, + "107929BC", + "str s16, [x8, x9, LSL #2]", + )); + + insns.push(( + Inst::FpuStore64 { + rd: vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), F64), + srcloc: None, + }, + "107929FC", + "str d16, [x8, x9, LSL #3]", + )); + + insns.push(( + Inst::FpuStore128 { + rd: vreg(16), + mem: MemArg::RegScaled(xreg(8), xreg(9), I128), + srcloc: None, + }, + "1079A93C", + "str q16, [x8, x9, LSL #4]", + )); + + insns.push(( + Inst::LoadFpuConst32 { + rd: writable_vreg(16), + const_data: 1.0, + }, + "5000001C020000140000803F", + "ldr s16, pc+8 ; b 8 ; data.f32 1", + )); + + insns.push(( + Inst::LoadFpuConst64 { + rd: writable_vreg(16), + const_data: 1.0, + }, + "5000005C03000014000000000000F03F", + "ldr d16, pc+8 ; b 12 ; data.f64 1", + )); + + insns.push(( + Inst::FpuCSel32 { + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(3), + cond: Cond::Hi, + }, + "418C231E", + "fcsel s1, s2, s3, hi", + )); + + insns.push(( + Inst::FpuCSel64 { + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(3), + cond: Cond::Eq, + }, + "410C631E", + "fcsel d1, d2, d3, eq", + )); + + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Minus32, + }, + "1743251E", + "frintm s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Minus64, + }, + "1743651E", + "frintm d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Plus32, + }, + "17C3241E", + "frintp s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Plus64, + }, + "17C3641E", + "frintp d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Zero32, + }, + "17C3251E", + "frintz s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Zero64, + }, + "17C3651E", + "frintz d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Nearest32, + }, + "1743241E", + "frintn s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Nearest64, + }, + "1743641E", + "frintn d23, d24", + )); + + let rru = create_reg_universe(); + for (insn, expected_encoding, expected_printing) in insns { + println!( + "ARM64: {:?}, {}, {}", + insn, expected_encoding, expected_printing + ); + + // Check the printed text is as expected. + let actual_printing = insn.show_rru(Some(&rru)); + assert_eq!(expected_printing, actual_printing); + + // Check the encoding is as expected. + let text_size = { + let mut code_sec = MachSectionSize::new(0); + insn.emit(&mut code_sec); + code_sec.size() + }; + + let mut sink = test_utils::TestCodeSink::new(); + let mut sections = MachSections::new(); + let code_idx = sections.add_section(0, text_size); + let code_sec = sections.get_section(code_idx); + insn.emit(code_sec); + sections.emit(&mut sink); + let actual_encoding = &sink.stringify(); + assert_eq!(expected_encoding, actual_encoding); + } + } + + #[test] + fn test_cond_invert() { + for cond in vec![ + Cond::Eq, + Cond::Ne, + Cond::Hs, + Cond::Lo, + Cond::Mi, + Cond::Pl, + Cond::Vs, + Cond::Vc, + Cond::Hi, + Cond::Ls, + Cond::Ge, + Cond::Lt, + Cond::Gt, + Cond::Le, + Cond::Al, + Cond::Nv, + ] + .into_iter() + { + assert_eq!(cond.invert().invert(), cond); + } + } +} diff --git a/cranelift/codegen/src/isa/arm64/inst/imms.rs b/cranelift/codegen/src/isa/arm64/inst/imms.rs new file mode 100644 index 0000000000..eda68af7b1 --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/inst/imms.rs @@ -0,0 +1,753 @@ +//! ARM64 ISA definitions: immediate constants. + +#![allow(dead_code)] +#![allow(non_snake_case)] + +use crate::ir::types::*; +use crate::ir::Type; +use crate::machinst::*; + +use regalloc::RealRegUniverse; + +use core::convert::TryFrom; +use std::string::String; + +/// A signed, scaled 7-bit offset. +#[derive(Clone, Copy, Debug)] +pub struct SImm7Scaled { + /// The value. + pub value: i16, + /// multiplied by the size of this type + pub scale_ty: Type, +} + +impl SImm7Scaled { + /// Create a SImm7Scaled from a raw offset and the known scale type, if + /// possible. + pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option { + assert!(scale_ty == I64 || scale_ty == I32); + let scale = scale_ty.bytes(); + assert!(scale.is_power_of_two()); + let scale = scale as i64; + let upper_limit = 63 * scale; + let lower_limit = -(64 * scale); + if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 { + Some(SImm7Scaled { + value: value as i16, + scale_ty, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero(scale_ty: Type) -> SImm7Scaled { + SImm7Scaled { value: 0, scale_ty } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + ((self.value / self.scale_ty.bytes() as i16) as u32) & 0x7f + } +} + +/// a 9-bit signed offset. +#[derive(Clone, Copy, Debug)] +pub struct SImm9 { + /// The value. + pub value: i16, +} + +impl SImm9 { + /// Create a signed 9-bit offset from a full-range value, if possible. + pub fn maybe_from_i64(value: i64) -> Option { + if value >= -256 && value <= 255 { + Some(SImm9 { + value: value as i16, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> SImm9 { + SImm9 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + (self.value as u32) & 0x1ff + } +} + +/// An unsigned, scaled 12-bit offset. +#[derive(Clone, Copy, Debug)] +pub struct UImm12Scaled { + /// The value. + pub value: u16, + /// multiplied by the size of this type + pub scale_ty: Type, +} + +impl UImm12Scaled { + /// Create a UImm12Scaled from a raw offset and the known scale type, if + /// possible. + pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option { + let scale = scale_ty.bytes(); + assert!(scale.is_power_of_two()); + let scale = scale as i64; + let limit = 4095 * scale; + if value >= 0 && value <= limit && (value & (scale - 1)) == 0 { + Some(UImm12Scaled { + value: value as u16, + scale_ty, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero(scale_ty: Type) -> UImm12Scaled { + UImm12Scaled { value: 0, scale_ty } + } + + /// Encoded bits. + pub fn bits(&self) -> u32 { + (self.value as u32 / self.scale_ty.bytes()) & 0xfff + } +} + +/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted +/// left by 0 or 12 places. +#[derive(Clone, Debug)] +pub struct Imm12 { + /// The immediate bits. + pub bits: usize, + /// Whether the immediate bits are shifted left by 12 or not. + pub shift12: bool, +} + +impl Imm12 { + /// Compute a Imm12 from raw bits, if possible. + pub fn maybe_from_u64(val: u64) -> Option { + if val == 0 { + Some(Imm12 { + bits: 0, + shift12: false, + }) + } else if val < 0xfff { + Some(Imm12 { + bits: val as usize, + shift12: false, + }) + } else if val < 0xfff_000 && (val & 0xfff == 0) { + Some(Imm12 { + bits: (val as usize) >> 12, + shift12: true, + }) + } else { + None + } + } + + /// Bits for 2-bit "shift" field in e.g. AddI. + pub fn shift_bits(&self) -> u8 { + if self.shift12 { + 0b01 + } else { + 0b00 + } + } + + /// Bits for 12-bit "imm" field in e.g. AddI. + pub fn imm_bits(&self) -> u16 { + self.bits as u16 + } +} + +/// An immediate for logical instructions. +#[derive(Clone, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub struct ImmLogic { + /// The actual value. + value: u64, + /// `N` flag. + pub N: bool, + /// `S` field: element size and element bits. + pub R: u8, + /// `R` field: rotate amount. + pub S: u8, +} + +impl ImmLogic { + /// Compute an ImmLogic from raw bits, if possible. + pub fn maybe_from_u64(value: u64, ty: Type) -> Option { + // Note: This function is a port of VIXL's Assembler::IsImmLogical. + + if ty != I64 && ty != I32 { + return None; + } + + let original_value = value; + + let value = if ty == I32 { + // To handle 32-bit logical immediates, the very easiest thing is to repeat + // the input value twice to make a 64-bit word. The correct encoding of that + // as a logical immediate will also be the correct encoding of the 32-bit + // value. + + // Avoid making the assumption that the most-significant 32 bits are zero by + // shifting the value left and duplicating it. + let value = value << 32; + value | value >> 32 + } else { + value + }; + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 bits + // are set. The pattern is rotated right by R, and repeated across a 32 or + // 64-bit value, depending on destination register width. + // + // Put another way: the basic format of a logical immediate is a single + // contiguous stretch of 1 bits, repeated across the whole word at intervals + // given by a power of 2. To identify them quickly, we first locate the + // lowest stretch of 1 bits, then the next 1 bit above that; that combination + // is different for every logical immediate, so it gives us all the + // information we need to identify the only logical immediate that our input + // could be, and then we simply check if that's the value we actually have. + // + // (The rotation parameter does give the possibility of the stretch of 1 bits + // going 'round the end' of the word. To deal with that, we observe that in + // any situation where that happens the bitwise NOT of the value is also a + // valid logical immediate. So we simply invert the input whenever its low bit + // is set, and then we know that the rotated case can't arise.) + let (value, inverted) = if value & 1 == 1 { + (!value, true) + } else { + (value, false) + }; + + if value == 0 { + return None; + } + + // The basic analysis idea: imagine our input word looks like this. + // + // 0011111000111110001111100011111000111110001111100011111000111110 + // c b a + // |<--d-->| + // + // We find the lowest set bit (as an actual power-of-2 value, not its index) + // and call it a. Then we add a to our original number, which wipes out the + // bottommost stretch of set bits and replaces it with a 1 carried into the + // next zero bit. Then we look for the new lowest set bit, which is in + // position b, and subtract it, so now our number is just like the original + // but with the lowest stretch of set bits completely gone. Now we find the + // lowest set bit again, which is position c in the diagram above. Then we'll + // measure the distance d between bit positions a and c (using CLZ), and that + // tells us that the only valid logical immediate that could possibly be equal + // to this number is the one in which a stretch of bits running from a to just + // below b is replicated every d bits. + fn lowest_set_bit(value: u64) -> u64 { + let bit = value.trailing_zeros(); + 1u64.checked_shl(bit).unwrap_or(0) + } + let a = lowest_set_bit(value); + assert_ne!(0, a); + let value_plus_a = value.wrapping_add(a); + let b = lowest_set_bit(value_plus_a); + let value_plus_a_minus_b = value_plus_a - b; + let c = lowest_set_bit(value_plus_a_minus_b); + + let (d, clz_a, out_n, mask) = if c != 0 { + // The general case, in which there is more than one stretch of set bits. + // Compute the repeat distance d, and set up a bitmask covering the basic + // unit of repetition (i.e. a word with the bottom d bits set). Also, in all + // of these cases the N bit of the output will be zero. + let clz_a = a.leading_zeros(); + let clz_c = c.leading_zeros(); + let d = clz_a - clz_c; + let mask = (1 << d) - 1; + (d, clz_a, 0, mask) + } else { + (64, a.leading_zeros(), 1, u64::max_value()) + }; + + // If the repeat period d is not a power of two, it can't be encoded. + if !d.is_power_of_two() { + return None; + } + + if ((b.wrapping_sub(a)) & !mask) != 0 { + // If the bit stretch (b - a) does not fit within the mask derived from the + // repeat period, then fail. + return None; + } + + // The only possible option is b - a repeated every d bits. Now we're going to + // actually construct the valid logical immediate derived from that + // specification, and see if it equals our original input. + // + // To repeat a value every d bits, we multiply it by a number of the form + // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can + // be derived using a table lookup on CLZ(d). + const MULTIPLIERS: [u64; 6] = [ + 0x0000000000000001, + 0x0000000100000001, + 0x0001000100010001, + 0x0101010101010101, + 0x1111111111111111, + 0x5555555555555555, + ]; + let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize]; + let candidate = b.wrapping_sub(a) * multiplier; + + if value != candidate { + // The candidate pattern doesn't match our input value, so fail. + return None; + } + + // We have a match! This is a valid logical immediate, so now we have to + // construct the bits and pieces of the instruction encoding that generates + // it. + + // Count the set bits in our basic stretch. The special case of clz(0) == -1 + // makes the answer come out right for stretches that reach the very top of + // the word (e.g. numbers like 0xffffc00000000000). + let clz_b = if b == 0 { + u32::max_value() // -1 + } else { + b.leading_zeros() + }; + let s = clz_a.wrapping_sub(clz_b); + + // Decide how many bits to rotate right by, to put the low bit of that basic + // stretch in position a. + let (s, r) = if inverted { + // If we inverted the input right at the start of this function, here's + // where we compensate: the number of set bits becomes the number of clear + // bits, and the rotation count is based on position b rather than position + // a (since b is the location of the 'lowest' 1 bit after inversion). + // Need wrapping for when clz_b is max_value() (for when b == 0). + (d - s, clz_b.wrapping_add(1) & (d - 1)) + } else { + (s, (clz_a + 1) & (d - 1)) + }; + + // Now we're done, except for having to encode the S output in such a way that + // it gives both the number of set bits and the length of the repeated + // segment. The s field is encoded like this: + // + // imms size S + // ssssss 64 UInt(ssssss) + // 0sssss 32 UInt(sssss) + // 10ssss 16 UInt(ssss) + // 110sss 8 UInt(sss) + // 1110ss 4 UInt(ss) + // 11110s 2 UInt(s) + // + // So we 'or' (2 * -d) with our computed s to form imms. + let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f; + debug_assert!(u8::try_from(r).is_ok()); + debug_assert!(u8::try_from(s).is_ok()); + Some(ImmLogic { + value: original_value, + N: out_n != 0, + R: r as u8, + S: s as u8, + }) + } + + pub fn from_raw(value: u64, n: bool, r: u8, s: u8) -> ImmLogic { + ImmLogic { + N: n, + R: r, + S: s, + value, + } + } + + /// Returns bits ready for encoding: (N:1, R:6, S:6) + pub fn enc_bits(&self) -> u16 { + ((self.N as u16) << 12) | ((self.R as u16) << 6) | (self.S as u16) + } + + /// Returns the value that this immediate represents. + pub fn value(&self) -> u64 { + self.value + } + + /// Return an immediate for the bitwise-inverted value. + pub fn invert(&self) -> ImmLogic { + // For every ImmLogical immediate, the inverse can also be encoded. + Self::maybe_from_u64(!self.value, I64).unwrap() + } +} + +/// An immediate for shift instructions. +#[derive(Clone, Debug)] +pub struct ImmShift { + /// 6-bit shift amount. + pub imm: u8, +} + +impl ImmShift { + /// Create an ImmShift from raw bits, if possible. + pub fn maybe_from_u64(val: u64) -> Option { + if val < 64 { + Some(ImmShift { imm: val as u8 }) + } else { + None + } + } + + /// Get the immediate value. + pub fn value(&self) -> u8 { + self.imm + } +} + +/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct MoveWideConst { + /// The value. + pub bits: u16, + /// shifted 16*shift bits to the left. + pub shift: u8, +} + +impl MoveWideConst { + /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option { + let mask0 = 0x0000_0000_0000_ffffu64; + let mask1 = 0x0000_0000_ffff_0000u64; + let mask2 = 0x0000_ffff_0000_0000u64; + let mask3 = 0xffff_0000_0000_0000u64; + + if value == (value & mask0) { + return Some(MoveWideConst { + bits: (value & mask0) as u16, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(MoveWideConst { + bits: ((value >> 16) & mask0) as u16, + shift: 1, + }); + } + if value == (value & mask2) { + return Some(MoveWideConst { + bits: ((value >> 32) & mask0) as u16, + shift: 2, + }); + } + if value == (value & mask3) { + return Some(MoveWideConst { + bits: ((value >> 48) & mask0) as u16, + shift: 3, + }); + } + None + } + + pub fn maybe_with_shift(imm: u16, shift: u8) -> Option { + let shift_enc = shift / 16; + if shift_enc > 3 { + None + } else { + Some(MoveWideConst { + bits: imm, + shift: shift_enc, + }) + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (16 * self.shift) + } +} + +impl ShowWithRRU for Imm12 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let shift = if self.shift12 { 12 } else { 0 }; + let value = self.bits << shift; + format!("#{}", value) + } +} + +impl ShowWithRRU for SImm7Scaled { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl ShowWithRRU for SImm9 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl ShowWithRRU for UImm12Scaled { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl ShowWithRRU for ImmLogic { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value()) + } +} + +impl ShowWithRRU for ImmShift { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.imm) + } +} + +impl ShowWithRRU for MoveWideConst { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + if self.shift == 0 { + format!("#{}", self.bits) + } else { + format!("#{}, LSL #{}", self.bits, self.shift * 16) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn imm_logical_test() { + assert_eq!(None, ImmLogic::maybe_from_u64(0, I64)); + assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64)); + + assert_eq!( + Some(ImmLogic { + value: 1, + N: true, + R: 0, + S: 0 + }), + ImmLogic::maybe_from_u64(1, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 2, + N: true, + R: 63, + S: 0 + }), + ImmLogic::maybe_from_u64(2, I64) + ); + + assert_eq!(None, ImmLogic::maybe_from_u64(5, I64)); + + assert_eq!(None, ImmLogic::maybe_from_u64(11, I64)); + + assert_eq!( + Some(ImmLogic { + value: 248, + N: true, + R: 61, + S: 4 + }), + ImmLogic::maybe_from_u64(248, I64) + ); + + assert_eq!(None, ImmLogic::maybe_from_u64(249, I64)); + + assert_eq!( + Some(ImmLogic { + value: 1920, + N: true, + R: 57, + S: 3 + }), + ImmLogic::maybe_from_u64(1920, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x7ffe, + N: true, + R: 63, + S: 13 + }), + ImmLogic::maybe_from_u64(0x7ffe, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x30000, + N: true, + R: 48, + S: 1 + }), + ImmLogic::maybe_from_u64(0x30000, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x100000, + N: true, + R: 44, + S: 0 + }), + ImmLogic::maybe_from_u64(0x100000, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: u64::max_value() - 1, + N: true, + R: 63, + S: 62 + }), + ImmLogic::maybe_from_u64(u64::max_value() - 1, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0xaaaaaaaaaaaaaaaa, + N: false, + R: 1, + S: 60 + }), + ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x8181818181818181, + N: false, + R: 1, + S: 49 + }), + ImmLogic::maybe_from_u64(0x8181818181818181, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0xffc3ffc3ffc3ffc3, + N: false, + R: 10, + S: 43 + }), + ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x100000001, + N: false, + R: 0, + S: 0 + }), + ImmLogic::maybe_from_u64(0x100000001, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x1111111111111111, + N: false, + R: 0, + S: 56 + }), + ImmLogic::maybe_from_u64(0x1111111111111111, I64) + ); + + for n in 0..2 { + let types = if n == 0 { vec![I64, I32] } else { vec![I64] }; + for s in 0..64 { + for r in 0..64 { + let imm = get_logical_imm(n, s, r); + for &ty in &types { + match ImmLogic::maybe_from_u64(imm, ty) { + Some(ImmLogic { value, .. }) => { + assert_eq!(imm, value); + ImmLogic::maybe_from_u64(!value, ty).unwrap(); + } + None => assert_eq!(0, imm), + }; + } + } + } + } + } + + // Repeat a value that has `width` bits, across a 64-bit value. + fn repeat(value: u64, width: u64) -> u64 { + let mut result = value & ((1 << width) - 1); + let mut i = width; + while i < 64 { + result |= result << i; + i *= 2; + } + result + } + + // Get the logical immediate, from the encoding N/R/S bits. + fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 { + // An integer is constructed from the n, imm_s and imm_r bits according to + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 64-bit value. + + if n == 1 { + if s == 0x3f { + return 0; + } + let bits = (1u64 << (s + 1)) - 1; + bits.rotate_right(r) + } else { + if (s >> 1) == 0x1f { + return 0; + } + let mut width = 0x20; + while width >= 0x2 { + if (s & width) == 0 { + let mask = width - 1; + if (s & mask) == mask { + return 0; + } + let bits = (1u64 << ((s & mask) + 1)) - 1; + return repeat(bits.rotate_right(r & mask), width.into()); + } + width >>= 1; + } + unreachable!(); + } + } +} diff --git a/cranelift/codegen/src/isa/arm64/inst/mod.rs b/cranelift/codegen/src/isa/arm64/inst/mod.rs new file mode 100644 index 0000000000..ecc948cc70 --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/inst/mod.rs @@ -0,0 +1,2515 @@ +//! This module defines arm64-specific machine instruction types. + +#![allow(non_snake_case)] +#![allow(unused_imports)] +#![allow(non_camel_case_types)] +#![allow(dead_code)] + +use crate::binemit::CodeOffset; +use crate::ir::constant::{ConstantData, ConstantOffset}; +use crate::ir::types::{ + B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, +}; +use crate::ir::{ExternalName, GlobalValue, JumpTable, Opcode, SourceLoc, TrapCode, Type}; +use crate::machinst::*; + +use regalloc::Map as RegallocMap; +use regalloc::{ + RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable, + NUM_REG_CLASSES, +}; +use regalloc::{RegUsageCollector, Set}; + +use alloc::vec::Vec; +use smallvec::{smallvec, SmallVec}; +use std::mem; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; + +//============================================================================= +// Instructions (top level): definition + +/// An ALU operation. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ALUOp { + Add32, + Add64, + Sub32, + Sub64, + Orr32, + Orr64, + OrrNot32, + OrrNot64, + And32, + And64, + AndNot32, + AndNot64, + Eor32, + Eor64, + EorNot32, + EorNot64, + AddS32, + AddS64, + SubS32, + SubS64, + MAdd32, // multiply-add + MAdd64, + MSub32, + MSub64, + SMulH, + UMulH, + SDiv64, + UDiv64, + RotR32, + RotR64, + Lsr32, + Lsr64, + Asr32, + Asr64, + Lsl32, + Lsl64, +} + +/// A floating-point unit (FPU) operation with one arg. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp1 { + Abs32, + Abs64, + Neg32, + Neg64, + Sqrt32, + Sqrt64, + Cvt32To64, + Cvt64To32, +} + +/// A floating-point unit (FPU) operation with two args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp2 { + Add32, + Add64, + Sub32, + Sub64, + Mul32, + Mul64, + Div32, + Div64, + Max32, + Max64, + Min32, + Min64, +} + +/// A floating-point unit (FPU) operation with three args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp3 { + MAdd32, + MAdd64, +} + +/// A conversion from an FP to an integer value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuToIntOp { + F32ToU32, + F32ToI32, + F32ToU64, + F32ToI64, + F64ToU32, + F64ToI32, + F64ToU64, + F64ToI64, +} + +/// A conversion from an integer to an FP value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum IntToFpuOp { + U32ToF32, + I32ToF32, + U32ToF64, + I32ToF64, + U64ToF32, + I64ToF32, + U64ToF64, + I64ToF64, +} + +/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to +/// nearest, and for 32- or 64-bit FP values. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuRoundMode { + Minus32, + Minus64, + Plus32, + Plus64, + Zero32, + Zero64, + Nearest32, + Nearest64, +} + +/// A vector ALU operation. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecALUOp { + SQAddScalar, // signed saturating add + UQAddScalar, // unsigned saturating add + SQSubScalar, // signed saturating subtract + UQSubScalar, // unsigned saturating subtract +} + +/// An operation on the bits of a register. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum BitOp { + RBit32, + RBit64, + Clz32, + Clz64, + Cls32, + Cls64, +} + +impl BitOp { + /// Is the opcode a 32-bit operation. + pub fn is_32_bit(&self) -> bool { + match self { + BitOp::RBit32 => true, + BitOp::Clz32 => true, + BitOp::Cls32 => true, + _ => false, + } + } + + /// Get the assembly mnemonic for this opcode. + pub fn op_str(&self) -> &'static str { + match self { + BitOp::RBit32 | BitOp::RBit64 => "rbit", + BitOp::Clz32 | BitOp::Clz64 => "clz", + BitOp::Cls32 | BitOp::Cls64 => "cls", + } + } +} + +impl From<(Opcode, Type)> for BitOp { + /// Get the BitOp from the IR opcode. + fn from(op_ty: (Opcode, Type)) -> BitOp { + match op_ty { + (Opcode::Bitrev, I32) => BitOp::RBit32, + (Opcode::Bitrev, I64) => BitOp::RBit64, + (Opcode::Clz, I32) => BitOp::Clz32, + (Opcode::Clz, I64) => BitOp::Clz64, + (Opcode::Cls, I32) => BitOp::Cls32, + (Opcode::Cls, I64) => BitOp::Cls64, + _ => unreachable!("Called with non-bit op!"), + } + } +} + +/// Instruction formats. +#[derive(Clone, Debug)] +pub enum Inst { + /// A no-op of zero size. + Nop, + + /// A no-op that is one instruction large. + Nop4, + + /// An ALU operation with two register sources and a register destination. + AluRRR { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + }, + /// An ALU operation with three register sources and a register destination. + AluRRRR { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + ra: Reg, + }, + /// An ALU operation with a register source and an immediate-12 source, and a register + /// destination. + AluRRImm12 { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + imm12: Imm12, + }, + /// An ALU operation with a register source and an immediate-logic source, and a register destination. + AluRRImmLogic { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + imml: ImmLogic, + }, + /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination. + AluRRImmShift { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + immshift: ImmShift, + }, + /// An ALU operation with two register sources, one of which can be shifted, and a register + /// destination. + AluRRRShift { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + shiftop: ShiftOpAndAmt, + }, + /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and + /// shifted, and a register destination. + AluRRRExtend { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + extendop: ExtendOp, + }, + + /// A bit op instruction with a single register source. + BitRR { + op: BitOp, + rd: Writable, + rn: Reg, + }, + + /// An unsigned (zero-extending) 8-bit load. + ULoad8 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// A signed (sign-extending) 8-bit load. + SLoad8 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// An unsigned (zero-extending) 16-bit load. + ULoad16 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// A signed (sign-extending) 16-bit load. + SLoad16 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// An unsigned (zero-extending) 32-bit load. + ULoad32 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// A signed (sign-extending) 32-bit load. + SLoad32 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + /// A 64-bit load. + ULoad64 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + + /// An 8-bit store. + Store8 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + /// A 16-bit store. + Store16 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + /// A 32-bit store. + Store32 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + /// A 64-bit store. + Store64 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + + /// A store of a pair of registers. + StoreP64 { + rt: Reg, + rt2: Reg, + mem: PairMemArg, + }, + /// A load of a pair of registers. + LoadP64 { + rt: Writable, + rt2: Writable, + mem: PairMemArg, + }, + + /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we + /// keep them separate at the `Inst` level for better pretty-printing + /// and faster `is_move()` logic. + Mov { + rd: Writable, + rm: Reg, + }, + + /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is + /// effectively an alias for an unsigned 32-to-64-bit extension. + Mov32 { + rd: Writable, + rm: Reg, + }, + + /// A MOVZ with a 16-bit immediate. + MovZ { + rd: Writable, + imm: MoveWideConst, + }, + + /// A MOVN with a 16-bit immediate. + MovN { + rd: Writable, + imm: MoveWideConst, + }, + + /// A MOVK with a 16-bit immediate. + MovK { + rd: Writable, + imm: MoveWideConst, + }, + + /// A sign- or zero-extend operation. + Extend { + rd: Writable, + rn: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + }, + + /// A conditional-select operation. + CSel { + rd: Writable, + cond: Cond, + rn: Reg, + rm: Reg, + }, + + /// A conditional-set operation. + CSet { + rd: Writable, + cond: Cond, + }, + + /// FPU move. Note that this is distinct from a vector-register + /// move; moving just 64 bits seems to be significantly faster. + FpuMove64 { + rd: Writable, + rn: Reg, + }, + + /// 1-op FPU instruction. + FpuRR { + fpu_op: FPUOp1, + rd: Writable, + rn: Reg, + }, + + /// 2-op FPU instruction. + FpuRRR { + fpu_op: FPUOp2, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// 3-op FPU instruction. + FpuRRRR { + fpu_op: FPUOp3, + rd: Writable, + rn: Reg, + rm: Reg, + ra: Reg, + }, + + /// FPU comparison, single-precision (32 bit). + FpuCmp32 { + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, double-precision (64 bit). + FpuCmp64 { + rn: Reg, + rm: Reg, + }, + + /// Floating-point loads and stores. + FpuLoad32 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + FpuStore32 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + FpuLoad64 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + FpuStore64 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + FpuLoad128 { + rd: Writable, + mem: MemArg, + srcloc: Option, + }, + FpuStore128 { + rd: Reg, + mem: MemArg, + srcloc: Option, + }, + + LoadFpuConst32 { + rd: Writable, + const_data: f32, + }, + + LoadFpuConst64 { + rd: Writable, + const_data: f64, + }, + + /// Conversions between FP and integer values. + FpuToInt { + op: FpuToIntOp, + rd: Writable, + rn: Reg, + }, + + IntToFpu { + op: IntToFpuOp, + rd: Writable, + rn: Reg, + }, + + // FP conditional select. + FpuCSel32 { + rd: Writable, + rn: Reg, + rm: Reg, + cond: Cond, + }, + FpuCSel64 { + rd: Writable, + rn: Reg, + rm: Reg, + cond: Cond, + }, + + // Round to integer. + FpuRound { + op: FpuRoundMode, + rd: Writable, + rn: Reg, + }, + + /// Move to a vector register from a GPR. + MovToVec64 { + rd: Writable, + rn: Reg, + }, + + /// Move to a GPR from a vector register. + MovFromVec64 { + rd: Writable, + rn: Reg, + }, + + /// A vector ALU op. + VecRRR { + alu_op: VecALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). + MovToNZCV { + rn: Reg, + }, + + /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn). + MovFromNZCV { + rd: Writable, + }, + + /// Set a register to 1 if condition, else 0. + CondSet { + rd: Writable, + cond: Cond, + }, + + /// A machine call instruction. + Call { + dest: ExternalName, + uses: Set, + defs: Set>, + loc: SourceLoc, + opcode: Opcode, + }, + /// A machine indirect-call instruction. + CallInd { + rn: Reg, + uses: Set, + defs: Set>, + loc: SourceLoc, + opcode: Opcode, + }, + + // ---- branches (exactly one must appear at end of BB) ---- + /// A machine return instruction. + Ret {}, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder {}, + + /// An unconditional branch. + Jump { + dest: BranchTarget, + }, + + /// A conditional branch. + CondBr { + taken: BranchTarget, + not_taken: BranchTarget, + kind: CondBrKind, + }, + + /// Lowered conditional branch: contains the original branch kind (or the + /// inverse), but only one BranchTarget is retained. The other is + /// implicitly the next instruction, given the final basic-block layout. + CondBrLowered { + target: BranchTarget, + kind: CondBrKind, + }, + + /// As for `CondBrLowered`, but represents a condbr/uncond-br sequence (two + /// actual machine instructions). Needed when the final block layout implies + /// that neither arm of a conditional branch targets the fallthrough block. + CondBrLoweredCompound { + taken: BranchTarget, + not_taken: BranchTarget, + kind: CondBrKind, + }, + + /// An indirect branch through a register, augmented with set of all + /// possible successors. + IndirectBr { + rn: Reg, + targets: Vec, + }, + + /// A "break" instruction, used for e.g. traps and debug breakpoints. + Brk, + + /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at + /// runtime. + Udf { + trap_info: (SourceLoc, TrapCode), + }, + + /// Load the address (using a PC-relative offset) of a MemLabel, using the + /// `ADR` instruction. + Adr { + rd: Writable, + label: MemLabel, + }, + + /// Raw 32-bit word, used for inline constants and jump-table entries. + Word4 { + data: u32, + }, + + /// Raw 64-bit word, used for inline constants. + Word8 { + data: u64, + }, + + /// Jump-table sequence, as one compound instruction (see note in lower.rs + /// for rationale). + JTSequence { + targets: Vec, + targets_for_term: Vec, // needed for MachTerminator. + ridx: Reg, + rtmp1: Writable, + rtmp2: Writable, + }, + + /// Load an inline constant. + LoadConst64 { + rd: Writable, + const_data: u64, + }, + + /// Load an inline symbol reference. + LoadExtName { + rd: Writable, + name: ExternalName, + srcloc: SourceLoc, + offset: i64, + }, +} + +fn count_clear_half_words(mut value: u64) -> usize { + let mut count = 0; + for _ in 0..4 { + if value & 0xffff == 0 { + count += 1; + } + value >>= 16; + } + + count +} + +impl Inst { + /// Create a move instruction. + pub fn mov(to_reg: Writable, from_reg: Reg) -> Inst { + assert!(to_reg.to_reg().get_class() == from_reg.get_class()); + if from_reg.get_class() == RegClass::I64 { + Inst::Mov { + rd: to_reg, + rm: from_reg, + } + } else { + Inst::FpuMove64 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create a 32-bit move instruction. + pub fn mov32(to_reg: Writable, from_reg: Reg) -> Inst { + Inst::Mov32 { + rd: to_reg, + rm: from_reg, + } + } + + /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN, + /// logical immediate, or constant pool). + pub fn load_constant(rd: Writable, value: u64) -> SmallVec<[Inst; 4]> { + if let Some(imm) = MoveWideConst::maybe_from_u64(value) { + // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ + smallvec![Inst::MovZ { rd, imm }] + } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) { + // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN + smallvec![Inst::MovN { rd, imm }] + } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) { + // Weird logical-instruction immediate in ORI using zero register + smallvec![Inst::AluRRImmLogic { + alu_op: ALUOp::Orr64, + rd, + rn: zero_reg(), + imml, + }] + } else { + let mut insts = smallvec![]; + + // If the number of 0xffff half words is greater than the number of 0x0000 half words + // it is more efficient to use `movn` for the first instruction. + let first_is_inverted = count_clear_half_words(!value) > count_clear_half_words(value); + // Either 0xffff or 0x0000 half words can be skipped, depending on the first + // instruction used. + let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; + let mut first_mov_emitted = false; + + for i in 0..4 { + let imm16 = (value >> (16 * i)) & 0xffff; + if imm16 != ignored_halfword { + if !first_mov_emitted { + first_mov_emitted = true; + if first_is_inverted { + let imm = + MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16) + .unwrap(); + insts.push(Inst::MovN { rd, imm }); + } else { + let imm = + MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); + insts.push(Inst::MovZ { rd, imm }); + } + } else { + let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); + insts.push(Inst::MovK { rd, imm }); + } + } + } + + assert!(first_mov_emitted); + + insts + } + } + + /// Create an instruction that loads a 32-bit floating-point constant. + pub fn load_fp_constant32(rd: Writable, value: f32) -> Inst { + // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits. + Inst::LoadFpuConst32 { + rd, + const_data: value, + } + } + + /// Create an instruction that loads a 64-bit floating-point constant. + pub fn load_fp_constant64(rd: Writable, value: f64) -> Inst { + // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent bits. + Inst::LoadFpuConst64 { + rd, + const_data: value, + } + } +} + +//============================================================================= +// Instructions: get_regs + +fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { + match memarg { + &MemArg::Unscaled(reg, ..) | &MemArg::UnsignedOffset(reg, ..) => { + collector.add_use(reg); + } + &MemArg::RegReg(r1, r2, ..) + | &MemArg::RegScaled(r1, r2, ..) + | &MemArg::RegScaledExtended(r1, r2, ..) => { + collector.add_use(r1); + collector.add_use(r2); + } + &MemArg::Label(..) => {} + &MemArg::PreIndexed(reg, ..) | &MemArg::PostIndexed(reg, ..) => { + collector.add_mod(reg); + } + &MemArg::FPOffset(..) => { + collector.add_use(fp_reg()); + } + &MemArg::SPOffset(..) => { + collector.add_use(stack_reg()); + } + } +} + +fn pairmemarg_regs(pairmemarg: &PairMemArg, collector: &mut RegUsageCollector) { + match pairmemarg { + &PairMemArg::SignedOffset(reg, ..) => { + collector.add_use(reg); + } + &PairMemArg::PreIndexed(reg, ..) | &PairMemArg::PostIndexed(reg, ..) => { + collector.add_mod(reg); + } + } +} + +fn arm64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { + match inst { + &Inst::AluRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRRR { rd, rn, rm, ra, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + collector.add_use(ra); + } + &Inst::AluRRImm12 { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRImmLogic { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRImmShift { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRRShift { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRRExtend { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::BitRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::ULoad8 { rd, ref mem, .. } + | &Inst::SLoad8 { rd, ref mem, .. } + | &Inst::ULoad16 { rd, ref mem, .. } + | &Inst::SLoad16 { rd, ref mem, .. } + | &Inst::ULoad32 { rd, ref mem, .. } + | &Inst::SLoad32 { rd, ref mem, .. } + | &Inst::ULoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::Store8 { rd, ref mem, .. } + | &Inst::Store16 { rd, ref mem, .. } + | &Inst::Store32 { rd, ref mem, .. } + | &Inst::Store64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::StoreP64 { + rt, rt2, ref mem, .. + } => { + collector.add_use(rt); + collector.add_use(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::LoadP64 { + rt, rt2, ref mem, .. + } => { + collector.add_def(rt); + collector.add_def(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::Mov { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => { + collector.add_def(rd); + } + &Inst::MovK { rd, .. } => { + collector.add_mod(rd); + } + &Inst::CSel { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CSet { rd, .. } => { + collector.add_def(rd); + } + &Inst::FpuMove64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + collector.add_use(ra); + } + &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad128 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore128 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => { + collector.add_def(rd); + } + &Inst::FpuToInt { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::IntToFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuRound { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::MovToVec64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::MovFromVec64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::MovToNZCV { rn } => { + collector.add_use(rn); + } + &Inst::MovFromNZCV { rd } => { + collector.add_def(rd); + } + &Inst::CondSet { rd, .. } => { + collector.add_def(rd); + } + &Inst::Extend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::Jump { .. } | &Inst::Ret { .. } | &Inst::EpiloguePlaceholder { .. } => {} + &Inst::Call { + ref uses, ref defs, .. + } => { + collector.add_uses(uses); + collector.add_defs(defs); + } + &Inst::CallInd { + ref uses, + ref defs, + rn, + .. + } => { + collector.add_uses(uses); + collector.add_defs(defs); + collector.add_use(rn); + } + &Inst::CondBr { ref kind, .. } + | &Inst::CondBrLowered { ref kind, .. } + | &Inst::CondBrLoweredCompound { ref kind, .. } => match kind { + CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { + collector.add_use(*rt); + } + CondBrKind::Cond(_) => {} + }, + &Inst::IndirectBr { rn, .. } => { + collector.add_use(rn); + } + &Inst::Nop | Inst::Nop4 => {} + &Inst::Brk => {} + &Inst::Udf { .. } => {} + &Inst::Adr { rd, .. } => { + collector.add_def(rd); + } + &Inst::Word4 { .. } | &Inst::Word8 { .. } => {} + &Inst::JTSequence { + ridx, rtmp1, rtmp2, .. + } => { + collector.add_use(ridx); + collector.add_def(rtmp1); + collector.add_def(rtmp2); + } + &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => { + collector.add_def(rd); + } + } +} + +//============================================================================= +// Instructions: map_regs + +fn arm64_map_regs( + inst: &mut Inst, + pre_map: &RegallocMap, + post_map: &RegallocMap, +) { + fn map(m: &RegallocMap, r: Reg) -> Reg { + if r.is_virtual() { + m.get(&r.to_virtual_reg()).cloned().unwrap().to_reg() + } else { + r + } + } + + fn map_wr(m: &RegallocMap, r: Writable) -> Writable { + Writable::from_reg(map(m, r.to_reg())) + } + + fn map_mem(u: &RegallocMap, mem: &MemArg) -> MemArg { + // N.B.: we take only the pre-map here, but this is OK because the + // only addressing modes that update registers (pre/post-increment on + // ARM64) both read and write registers, so they are "mods" rather + // than "defs", so must be the same in both the pre- and post-map. + match mem { + &MemArg::Unscaled(reg, simm9) => MemArg::Unscaled(map(u, reg), simm9), + &MemArg::UnsignedOffset(reg, uimm12) => MemArg::UnsignedOffset(map(u, reg), uimm12), + &MemArg::RegReg(r1, r2) => MemArg::RegReg(map(u, r1), map(u, r2)), + &MemArg::RegScaled(r1, r2, ty) => MemArg::RegScaled(map(u, r1), map(u, r2), ty), + &MemArg::RegScaledExtended(r1, r2, ty, op) => { + MemArg::RegScaledExtended(map(u, r1), map(u, r2), ty, op) + } + &MemArg::Label(ref l) => MemArg::Label(l.clone()), + &MemArg::PreIndexed(r, simm9) => MemArg::PreIndexed(map_wr(u, r), simm9), + &MemArg::PostIndexed(r, simm9) => MemArg::PostIndexed(map_wr(u, r), simm9), + &MemArg::FPOffset(off) => MemArg::FPOffset(off), + &MemArg::SPOffset(off) => MemArg::SPOffset(off), + } + } + + fn map_pairmem(u: &RegallocMap, mem: &PairMemArg) -> PairMemArg { + match mem { + &PairMemArg::SignedOffset(reg, simm7) => PairMemArg::SignedOffset(map(u, reg), simm7), + &PairMemArg::PreIndexed(reg, simm7) => PairMemArg::PreIndexed(map_wr(u, reg), simm7), + &PairMemArg::PostIndexed(reg, simm7) => PairMemArg::PostIndexed(map_wr(u, reg), simm7), + } + } + + fn map_br(u: &RegallocMap, br: &CondBrKind) -> CondBrKind { + match br { + &CondBrKind::Zero(reg) => CondBrKind::Zero(map(u, reg)), + &CondBrKind::NotZero(reg) => CondBrKind::NotZero(map(u, reg)), + &CondBrKind::Cond(c) => CondBrKind::Cond(c), + } + } + + let u = pre_map; // For brevity below. + let d = post_map; + + let newval = match inst { + &mut Inst::AluRRR { alu_op, rd, rn, rm } => Inst::AluRRR { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::AluRRRR { + alu_op, + rd, + rn, + rm, + ra, + } => Inst::AluRRRR { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + ra: map(u, ra), + }, + &mut Inst::AluRRImm12 { + alu_op, + rd, + rn, + ref imm12, + } => Inst::AluRRImm12 { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + imm12: imm12.clone(), + }, + &mut Inst::AluRRImmLogic { + alu_op, + rd, + rn, + ref imml, + } => Inst::AluRRImmLogic { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + imml: imml.clone(), + }, + &mut Inst::AluRRImmShift { + alu_op, + rd, + rn, + ref immshift, + } => Inst::AluRRImmShift { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + immshift: immshift.clone(), + }, + &mut Inst::AluRRRShift { + alu_op, + rd, + rn, + rm, + ref shiftop, + } => Inst::AluRRRShift { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + shiftop: shiftop.clone(), + }, + &mut Inst::AluRRRExtend { + alu_op, + rd, + rn, + rm, + ref extendop, + } => Inst::AluRRRExtend { + alu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + extendop: extendop.clone(), + }, + &mut Inst::BitRR { op, rd, rn } => Inst::BitRR { + op, + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::ULoad8 { + rd, + ref mem, + srcloc, + } => Inst::ULoad8 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::SLoad8 { + rd, + ref mem, + srcloc, + } => Inst::SLoad8 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::ULoad16 { + rd, + ref mem, + srcloc, + } => Inst::ULoad16 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::SLoad16 { + rd, + ref mem, + srcloc, + } => Inst::SLoad16 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::ULoad32 { + rd, + ref mem, + srcloc, + } => Inst::ULoad32 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::SLoad32 { + rd, + ref mem, + srcloc, + } => Inst::SLoad32 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::ULoad64 { + rd, + ref mem, + srcloc, + } => Inst::ULoad64 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::Store8 { + rd, + ref mem, + srcloc, + } => Inst::Store8 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::Store16 { + rd, + ref mem, + srcloc, + } => Inst::Store16 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::Store32 { + rd, + ref mem, + srcloc, + } => Inst::Store32 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::Store64 { + rd, + ref mem, + srcloc, + } => Inst::Store64 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::StoreP64 { rt, rt2, ref mem } => Inst::StoreP64 { + rt: map(u, rt), + rt2: map(u, rt2), + mem: map_pairmem(u, mem), + }, + &mut Inst::LoadP64 { rt, rt2, ref mem } => Inst::LoadP64 { + rt: map_wr(d, rt), + rt2: map_wr(d, rt2), + mem: map_pairmem(u, mem), + }, + &mut Inst::Mov { rd, rm } => Inst::Mov { + rd: map_wr(d, rd), + rm: map(u, rm), + }, + &mut Inst::Mov32 { rd, rm } => Inst::Mov32 { + rd: map_wr(d, rd), + rm: map(u, rm), + }, + &mut Inst::MovZ { rd, ref imm } => Inst::MovZ { + rd: map_wr(d, rd), + imm: imm.clone(), + }, + &mut Inst::MovN { rd, ref imm } => Inst::MovN { + rd: map_wr(d, rd), + imm: imm.clone(), + }, + &mut Inst::MovK { rd, ref imm } => Inst::MovK { + rd: map_wr(d, rd), + imm: imm.clone(), + }, + &mut Inst::CSel { rd, rn, rm, cond } => Inst::CSel { + cond, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::CSet { rd, cond } => Inst::CSet { + cond, + rd: map_wr(d, rd), + }, + &mut Inst::FpuMove64 { rd, rn } => Inst::FpuMove64 { + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::FpuRR { fpu_op, rd, rn } => Inst::FpuRR { + fpu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::FpuRRR { fpu_op, rd, rn, rm } => Inst::FpuRRR { + fpu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => Inst::FpuRRRR { + fpu_op, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + ra: map(u, ra), + }, + &mut Inst::FpuCmp32 { rn, rm } => Inst::FpuCmp32 { + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::FpuCmp64 { rn, rm } => Inst::FpuCmp64 { + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::FpuLoad32 { + rd, + ref mem, + srcloc, + } => Inst::FpuLoad32 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::FpuLoad64 { + rd, + ref mem, + srcloc, + } => Inst::FpuLoad64 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::FpuLoad128 { + rd, + ref mem, + srcloc, + } => Inst::FpuLoad64 { + rd: map_wr(d, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::FpuStore32 { + rd, + ref mem, + srcloc, + } => Inst::FpuStore32 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::FpuStore64 { + rd, + ref mem, + srcloc, + } => Inst::FpuStore64 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::FpuStore128 { + rd, + ref mem, + srcloc, + } => Inst::FpuStore64 { + rd: map(u, rd), + mem: map_mem(u, mem), + srcloc, + }, + &mut Inst::LoadFpuConst32 { rd, const_data } => Inst::LoadFpuConst32 { + rd: map_wr(d, rd), + const_data, + }, + &mut Inst::LoadFpuConst64 { rd, const_data } => Inst::LoadFpuConst64 { + rd: map_wr(d, rd), + const_data, + }, + &mut Inst::FpuToInt { op, rd, rn } => Inst::FpuToInt { + op, + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::IntToFpu { op, rd, rn } => Inst::IntToFpu { + op, + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::FpuCSel32 { rd, rn, rm, cond } => Inst::FpuCSel32 { + cond, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::FpuCSel64 { rd, rn, rm, cond } => Inst::FpuCSel64 { + cond, + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + }, + &mut Inst::FpuRound { op, rd, rn } => Inst::FpuRound { + op, + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::MovToVec64 { rd, rn } => Inst::MovToVec64 { + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::MovFromVec64 { rd, rn } => Inst::MovFromVec64 { + rd: map_wr(d, rd), + rn: map(u, rn), + }, + &mut Inst::VecRRR { rd, rn, rm, alu_op } => Inst::VecRRR { + rd: map_wr(d, rd), + rn: map(u, rn), + rm: map(u, rm), + alu_op, + }, + &mut Inst::MovToNZCV { rn } => Inst::MovToNZCV { rn: map(u, rn) }, + &mut Inst::MovFromNZCV { rd } => Inst::MovFromNZCV { rd: map_wr(d, rd) }, + &mut Inst::CondSet { rd, cond } => Inst::CondSet { + rd: map_wr(d, rd), + cond, + }, + &mut Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => Inst::Extend { + rd: map_wr(d, rd), + rn: map(u, rn), + signed, + from_bits, + to_bits, + }, + &mut Inst::Jump { dest } => Inst::Jump { dest }, + &mut Inst::Call { + ref uses, + ref defs, + ref dest, + loc, + opcode, + } => { + let uses = uses.map(|r| map(u, *r)); + let defs = defs.map(|r| map_wr(d, *r)); + let dest = dest.clone(); + Inst::Call { + dest, + uses, + defs, + loc, + opcode, + } + } + &mut Inst::Ret {} => Inst::Ret {}, + &mut Inst::EpiloguePlaceholder {} => Inst::EpiloguePlaceholder {}, + &mut Inst::CallInd { + ref uses, + ref defs, + rn, + loc, + opcode, + } => { + let uses = uses.map(|r| map(u, *r)); + let defs = defs.map(|r| map_wr(d, *r)); + Inst::CallInd { + uses, + defs, + rn: map(u, rn), + loc, + opcode, + } + } + &mut Inst::CondBr { + taken, + not_taken, + kind, + } => Inst::CondBr { + taken, + not_taken, + kind: map_br(u, &kind), + }, + &mut Inst::CondBrLowered { target, kind } => Inst::CondBrLowered { + target, + kind: map_br(u, &kind), + }, + &mut Inst::CondBrLoweredCompound { + taken, + not_taken, + kind, + } => Inst::CondBrLoweredCompound { + taken, + not_taken, + kind: map_br(u, &kind), + }, + &mut Inst::IndirectBr { rn, ref targets } => Inst::IndirectBr { + rn: map(u, rn), + targets: targets.clone(), + }, + &mut Inst::Nop => Inst::Nop, + &mut Inst::Nop4 => Inst::Nop4, + &mut Inst::Brk => Inst::Brk, + &mut Inst::Udf { trap_info } => Inst::Udf { trap_info }, + &mut Inst::Adr { rd, ref label } => Inst::Adr { + rd: map_wr(d, rd), + label: label.clone(), + }, + &mut Inst::Word4 { data } => Inst::Word4 { data }, + &mut Inst::Word8 { data } => Inst::Word8 { data }, + &mut Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + ref targets, + ref targets_for_term, + } => Inst::JTSequence { + targets: targets.clone(), + targets_for_term: targets_for_term.clone(), + ridx: map(u, ridx), + rtmp1: map_wr(d, rtmp1), + rtmp2: map_wr(d, rtmp2), + }, + &mut Inst::LoadConst64 { rd, const_data } => Inst::LoadConst64 { + rd: map_wr(d, rd), + const_data, + }, + &mut Inst::LoadExtName { + rd, + ref name, + offset, + srcloc, + } => Inst::LoadExtName { + rd: map_wr(d, rd), + name: name.clone(), + offset, + srcloc, + }, + }; + *inst = newval; +} + +//============================================================================= +// Instructions: misc functions and external interface + +impl MachInst for Inst { + fn get_regs(&self, collector: &mut RegUsageCollector) { + arm64_get_regs(self, collector) + } + + fn map_regs( + &mut self, + pre_map: &RegallocMap, + post_map: &RegallocMap, + ) { + arm64_map_regs(self, pre_map, post_map); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + &Inst::Mov { rd, rm } => Some((rd, rm)), + &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), + _ => None, + } + } + + fn is_epilogue_placeholder(&self) -> bool { + if let Inst::EpiloguePlaceholder { .. } = self { + true + } else { + false + } + } + + fn is_term<'a>(&'a self) -> MachTerminator<'a> { + match self { + &Inst::Ret {} | &Inst::EpiloguePlaceholder {} => MachTerminator::Ret, + &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_block_index().unwrap()), + &Inst::CondBr { + taken, not_taken, .. + } => MachTerminator::Cond( + taken.as_block_index().unwrap(), + not_taken.as_block_index().unwrap(), + ), + &Inst::CondBrLowered { .. } => { + // When this is used prior to branch finalization for branches + // within an open-coded sequence, i.e. with ResolvedOffsets, + // do not consider it a terminator. From the point of view of CFG analysis, + // it is part of a black-box single-in single-out region, hence is not + // denoted a terminator. + MachTerminator::None + } + &Inst::CondBrLoweredCompound { .. } => { + panic!("is_term() called after lowering branches"); + } + &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]), + &Inst::JTSequence { + ref targets_for_term, + .. + } => MachTerminator::Indirect(&targets_for_term[..]), + _ => MachTerminator::None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + assert!(ty.bits() <= 64); // no vector support yet! + Inst::mov(to_reg, from_reg) + } + + fn gen_zero_len_nop() -> Inst { + Inst::Nop + } + + fn gen_nop(preferred_size: usize) -> Inst { + // We can't give a NOP (or any insn) < 4 bytes. + assert!(preferred_size >= 4); + Inst::Nop4 + } + + fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { + None + } + + fn rc_for_type(ty: Type) -> RegClass { + match ty { + I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => RegClass::I64, + F32 | F64 => RegClass::V128, + I128 | B128 => RegClass::V128, + IFLAGS | FFLAGS => RegClass::I64, + _ => panic!("Unexpected SSA-value type: {}", ty), + } + } + + fn gen_jump(blockindex: BlockIndex) -> Inst { + Inst::Jump { + dest: BranchTarget::Block(blockindex), + } + } + + fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]) { + match self { + &mut Inst::Jump { ref mut dest } => { + dest.map(block_target_map); + } + &mut Inst::CondBr { + ref mut taken, + ref mut not_taken, + .. + } => { + taken.map(block_target_map); + not_taken.map(block_target_map); + } + &mut Inst::CondBrLowered { .. } => { + // See note in `is_term()`: this is used in open-coded sequences + // within blocks and should be left alone. + } + &mut Inst::CondBrLoweredCompound { .. } => { + panic!("with_block_rewrites called after branch lowering!"); + } + _ => {} + } + } + + fn with_fallthrough_block(&mut self, fallthrough: Option) { + match self { + &mut Inst::CondBr { + taken, + not_taken, + kind, + } => { + if taken.as_block_index() == fallthrough + && not_taken.as_block_index() == fallthrough + { + *self = Inst::Nop; + } else if taken.as_block_index() == fallthrough { + *self = Inst::CondBrLowered { + target: not_taken, + kind: kind.invert(), + }; + } else if not_taken.as_block_index() == fallthrough { + *self = Inst::CondBrLowered { + target: taken, + kind, + }; + } else { + // We need a compound sequence (condbr / uncond-br). + *self = Inst::CondBrLoweredCompound { + taken, + not_taken, + kind, + }; + } + } + &mut Inst::Jump { dest } => { + if dest.as_block_index() == fallthrough { + *self = Inst::Nop; + } + } + _ => {} + } + } + + fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]) { + match self { + &mut Inst::CondBrLowered { ref mut target, .. } => { + target.lower(targets, my_offset); + } + &mut Inst::CondBrLoweredCompound { + ref mut taken, + ref mut not_taken, + .. + } => { + taken.lower(targets, my_offset); + not_taken.lower(targets, my_offset + 4); + } + &mut Inst::Jump { ref mut dest } => { + dest.lower(targets, my_offset); + } + &mut Inst::JTSequence { + targets: ref mut t, .. + } => { + for target in t { + // offset+20: jumptable is 20 bytes into compound sequence. + target.lower(targets, my_offset + 20); + } + } + _ => {} + } + } + + fn reg_universe() -> RealRegUniverse { + create_reg_universe() + } +} + +//============================================================================= +// Pretty-printing of instructions. + +fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(0, mem); + let mut mem_str = mem_insts + .into_iter() + .map(|inst| inst.show_rru(mb_rru)) + .collect::>() + .join(" ; "); + if !mem_str.is_empty() { + mem_str += " ; "; + } + + (mem_str, mem) +} + +impl ShowWithRRU for Inst { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn op_is32(alu_op: ALUOp) -> (&'static str, bool) { + match alu_op { + ALUOp::Add32 => ("add", true), + ALUOp::Add64 => ("add", false), + ALUOp::Sub32 => ("sub", true), + ALUOp::Sub64 => ("sub", false), + ALUOp::Orr32 => ("orr", true), + ALUOp::Orr64 => ("orr", false), + ALUOp::And32 => ("and", true), + ALUOp::And64 => ("and", false), + ALUOp::Eor32 => ("eor", true), + ALUOp::Eor64 => ("eor", false), + ALUOp::AddS32 => ("adds", true), + ALUOp::AddS64 => ("adds", false), + ALUOp::SubS32 => ("subs", true), + ALUOp::SubS64 => ("subs", false), + ALUOp::MAdd32 => ("madd", true), + ALUOp::MAdd64 => ("madd", false), + ALUOp::MSub32 => ("msub", true), + ALUOp::MSub64 => ("msub", false), + ALUOp::SMulH => ("smulh", false), + ALUOp::UMulH => ("umulh", false), + ALUOp::SDiv64 => ("sdiv", false), + ALUOp::UDiv64 => ("udiv", false), + ALUOp::AndNot32 => ("bic", true), + ALUOp::AndNot64 => ("bic", false), + ALUOp::OrrNot32 => ("orn", true), + ALUOp::OrrNot64 => ("orn", false), + ALUOp::EorNot32 => ("eon", true), + ALUOp::EorNot64 => ("eon", false), + ALUOp::RotR32 => ("ror", true), + ALUOp::RotR64 => ("ror", false), + ALUOp::Lsr32 => ("lsr", true), + ALUOp::Lsr64 => ("lsr", false), + ALUOp::Asr32 => ("asr", true), + ALUOp::Asr64 => ("asr", false), + ALUOp::Lsl32 => ("lsl", true), + ALUOp::Lsl64 => ("lsl", false), + } + } + + match self { + &Inst::Nop => "nop-zero-len".to_string(), + &Inst::Nop4 => "nop".to_string(), + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let rm = show_ireg_sized(rm, mb_rru, is32); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::AluRRRR { + alu_op, + rd, + rn, + rm, + ra, + } => { + let (op, is32) = op_is32(alu_op); + let four_args = alu_op != ALUOp::SMulH && alu_op != ALUOp::UMulH; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let rm = show_ireg_sized(rm, mb_rru, is32); + let ra = show_ireg_sized(ra, mb_rru, is32); + if four_args { + format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) + } else { + // smulh and umulh have Ra "hard-wired" to the zero register + // and the canonical assembly form has only three regs. + format!("{} {}, {}, {}", op, rd, rn, rm) + } + } + &Inst::AluRRImm12 { + alu_op, + rd, + rn, + ref imm12, + } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + + if imm12.bits == 0 && alu_op == ALUOp::Add64 { + // special-case MOV (used for moving into SP). + format!("mov {}, {}", rd, rn) + } else { + let imm12 = imm12.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imm12) + } + } + &Inst::AluRRImmLogic { + alu_op, + rd, + rn, + ref imml, + } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let imml = imml.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imml) + } + &Inst::AluRRImmShift { + alu_op, + rd, + rn, + ref immshift, + } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let immshift = immshift.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, immshift) + } + &Inst::AluRRRShift { + alu_op, + rd, + rn, + rm, + ref shiftop, + } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let rm = show_ireg_sized(rm, mb_rru, is32); + let shiftop = shiftop.show_rru(mb_rru); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop) + } + &Inst::AluRRRExtend { + alu_op, + rd, + rn, + rm, + ref extendop, + } => { + let (op, is32) = op_is32(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + let rm = show_ireg_sized(rm, mb_rru, is32); + let extendop = extendop.show_rru(mb_rru); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop) + } + &Inst::BitRR { op, rd, rn } => { + let is32 = op.is_32_bit(); + let op = op.op_str(); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_ireg_sized(rn, mb_rru, is32); + format!("{} {}, {}", op, rd, rn) + } + &Inst::ULoad8 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::SLoad8 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::ULoad16 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::SLoad16 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::ULoad32 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::SLoad32 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::ULoad64 { + rd, + ref mem, + srcloc: _srcloc, + .. + } => { + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + + let is_unscaled = match &mem { + &MemArg::Unscaled(..) => true, + _ => false, + }; + let (op, is32) = match (self, is_unscaled) { + (&Inst::ULoad8 { .. }, false) => ("ldrb", true), + (&Inst::ULoad8 { .. }, true) => ("ldurb", true), + (&Inst::SLoad8 { .. }, false) => ("ldrsb", false), + (&Inst::SLoad8 { .. }, true) => ("ldursb", false), + (&Inst::ULoad16 { .. }, false) => ("ldrh", true), + (&Inst::ULoad16 { .. }, true) => ("ldurh", true), + (&Inst::SLoad16 { .. }, false) => ("ldrsh", false), + (&Inst::SLoad16 { .. }, true) => ("ldursh", false), + (&Inst::ULoad32 { .. }, false) => ("ldr", true), + (&Inst::ULoad32 { .. }, true) => ("ldur", true), + (&Inst::SLoad32 { .. }, false) => ("ldrsw", false), + (&Inst::SLoad32 { .. }, true) => ("ldursw", false), + (&Inst::ULoad64 { .. }, false) => ("ldr", false), + (&Inst::ULoad64 { .. }, true) => ("ldur", false), + _ => unreachable!(), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::Store8 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::Store16 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::Store32 { + rd, + ref mem, + srcloc: _srcloc, + } + | &Inst::Store64 { + rd, + ref mem, + srcloc: _srcloc, + .. + } => { + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + + let is_unscaled = match &mem { + &MemArg::Unscaled(..) => true, + _ => false, + }; + let (op, is32) = match (self, is_unscaled) { + (&Inst::Store8 { .. }, false) => ("strb", true), + (&Inst::Store8 { .. }, true) => ("sturb", true), + (&Inst::Store16 { .. }, false) => ("strh", true), + (&Inst::Store16 { .. }, true) => ("sturh", true), + (&Inst::Store32 { .. }, false) => ("str", true), + (&Inst::Store32 { .. }, true) => ("stur", true), + (&Inst::Store64 { .. }, false) => ("str", false), + (&Inst::Store64 { .. }, true) => ("stur", false), + _ => unreachable!(), + }; + let rd = show_ireg_sized(rd, mb_rru, is32); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::StoreP64 { rt, rt2, ref mem } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("stp {}, {}, {}", rt, rt2, mem) + } + &Inst::LoadP64 { rt, rt2, ref mem } => { + let rt = rt.to_reg().show_rru(mb_rru); + let rt2 = rt2.to_reg().show_rru(mb_rru); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("ldp {}, {}, {}", rt, rt2, mem) + } + &Inst::Mov { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("mov {}, {}", rd, rm) + } + &Inst::Mov32 { rd, rm } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + let rm = show_ireg_sized(rm, mb_rru, /* is32 = */ true); + format!("mov {}, {}", rd, rm) + } + &Inst::MovZ { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let imm = imm.show_rru(mb_rru); + format!("movz {}, {}", rd, imm) + } + &Inst::MovN { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let imm = imm.show_rru(mb_rru); + format!("movn {}, {}", rd, imm) + } + &Inst::MovK { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let imm = imm.show_rru(mb_rru); + format!("movk {}, {}", rd, imm) + } + &Inst::CSel { rd, rn, rm, cond } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("csel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::CSet { rd, cond } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("cset {}, {}", rd, cond) + } + &Inst::FpuMove64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("mov {}.8b, {}.8b", rd, rn) + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let (op, is32src, is32dst) = match fpu_op { + FPUOp1::Abs32 => ("fabs", true, true), + FPUOp1::Abs64 => ("fabs", false, false), + FPUOp1::Neg32 => ("fneg", true, true), + FPUOp1::Neg64 => ("fneg", false, false), + FPUOp1::Sqrt32 => ("fsqrt", true, true), + FPUOp1::Sqrt64 => ("fsqrt", false, false), + FPUOp1::Cvt32To64 => ("fcvt", true, false), + FPUOp1::Cvt64To32 => ("fcvt", false, true), + }; + let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dst); + let rn = show_freg_sized(rn, mb_rru, is32src); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let (op, is32) = match fpu_op { + FPUOp2::Add32 => ("fadd", true), + FPUOp2::Add64 => ("fadd", false), + FPUOp2::Sub32 => ("fsub", true), + FPUOp2::Sub64 => ("fsub", false), + FPUOp2::Mul32 => ("fmul", true), + FPUOp2::Mul64 => ("fmul", false), + FPUOp2::Div32 => ("fdiv", true), + FPUOp2::Div64 => ("fdiv", false), + FPUOp2::Max32 => ("fmax", true), + FPUOp2::Max64 => ("fmax", false), + FPUOp2::Min32 => ("fmin", true), + FPUOp2::Min64 => ("fmin", false), + }; + let rd = show_freg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_freg_sized(rn, mb_rru, is32); + let rm = show_freg_sized(rm, mb_rru, is32); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { + let (op, is32) = match fpu_op { + FPUOp3::MAdd32 => ("fmadd", true), + FPUOp3::MAdd64 => ("fmadd", false), + }; + let rd = show_freg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_freg_sized(rn, mb_rru, is32); + let rm = show_freg_sized(rm, mb_rru, is32); + let ra = show_freg_sized(ra, mb_rru, is32); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) + } + &Inst::FpuCmp32 { rn, rm } => { + let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true); + let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true); + format!("fcmp {}, {}", rn, rm) + } + &Inst::FpuCmp64 { rn, rm } => { + let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false); + let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false); + format!("fcmp {}, {}", rn, rm) + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 4); + format!("ldr {}, {}", rd, mem) + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("ldr {}, {}", rd, mem) + } + &Inst::FpuLoad128 { rd, ref mem, .. } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rd = "q".to_string() + &rd[1..]; + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("ldr {}, {}", rd, mem) + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + let rd = show_freg_sized(rd, mb_rru, /* is32 = */ true); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 4); + format!("str {}, {}", rd, mem) + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + let rd = show_freg_sized(rd, mb_rru, /* is32 = */ false); + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("str {}, {}", rd, mem) + } + &Inst::FpuStore128 { rd, ref mem, .. } => { + let rd = rd.show_rru(mb_rru); + let rd = "q".to_string() + &rd[1..]; + let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + format!("str {}, {}", rd, mem) + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + format!("ldr {}, pc+8 ; b 8 ; data.f32 {}", rd, const_data) + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false); + format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, const_data) + } + &Inst::FpuToInt { op, rd, rn } => { + let (op, is32src, is32dest) = match op { + FpuToIntOp::F32ToI32 => ("fcvtzs", true, true), + FpuToIntOp::F32ToU32 => ("fcvtzu", true, true), + FpuToIntOp::F32ToI64 => ("fcvtzs", true, false), + FpuToIntOp::F32ToU64 => ("fcvtzu", true, false), + FpuToIntOp::F64ToI32 => ("fcvtzs", false, true), + FpuToIntOp::F64ToU32 => ("fcvtzu", false, true), + FpuToIntOp::F64ToI64 => ("fcvtzs", false, false), + FpuToIntOp::F64ToU64 => ("fcvtzu", false, false), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, is32dest); + let rn = show_freg_sized(rn, mb_rru, is32src); + format!("{} {}, {}", op, rd, rn) + } + &Inst::IntToFpu { op, rd, rn } => { + let (op, is32src, is32dest) = match op { + IntToFpuOp::I32ToF32 => ("scvtf", true, true), + IntToFpuOp::U32ToF32 => ("ucvtf", true, true), + IntToFpuOp::I64ToF32 => ("scvtf", false, true), + IntToFpuOp::U64ToF32 => ("ucvtf", false, true), + IntToFpuOp::I32ToF64 => ("scvtf", true, false), + IntToFpuOp::U32ToF64 => ("ucvtf", true, false), + IntToFpuOp::I64ToF64 => ("scvtf", false, false), + IntToFpuOp::U64ToF64 => ("ucvtf", false, false), + }; + let rd = show_freg_sized(rd.to_reg(), mb_rru, is32dest); + let rn = show_ireg_sized(rn, mb_rru, is32src); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuCSel32 { rd, rn, rm, cond } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + let rn = show_freg_sized(rn, mb_rru, /* is32 = */ true); + let rm = show_freg_sized(rm, mb_rru, /* is32 = */ true); + let cond = cond.show_rru(mb_rru); + format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::FpuCSel64 { rd, rn, rm, cond } => { + let rd = show_freg_sized(rd.to_reg(), mb_rru, /* is32 = */ false); + let rn = show_freg_sized(rn, mb_rru, /* is32 = */ false); + let rm = show_freg_sized(rm, mb_rru, /* is32 = */ false); + let cond = cond.show_rru(mb_rru); + format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::FpuRound { op, rd, rn } => { + let (inst, is32) = match op { + FpuRoundMode::Minus32 => ("frintm", true), + FpuRoundMode::Minus64 => ("frintm", false), + FpuRoundMode::Plus32 => ("frintp", true), + FpuRoundMode::Plus64 => ("frintp", false), + FpuRoundMode::Zero32 => ("frintz", true), + FpuRoundMode::Zero64 => ("frintz", false), + FpuRoundMode::Nearest32 => ("frintn", true), + FpuRoundMode::Nearest64 => ("frintn", false), + }; + let rd = show_freg_sized(rd.to_reg(), mb_rru, is32); + let rn = show_freg_sized(rn, mb_rru, is32); + format!("{} {}, {}", inst, rd, rn) + } + &Inst::MovToVec64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("mov {}.d[0], {}", rd, rn) + } + &Inst::MovFromVec64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("mov {}, {}.d[0]", rd, rn) + } + &Inst::VecRRR { rd, rn, rm, alu_op } => { + let op = match alu_op { + VecALUOp::SQAddScalar => "sqadd", + VecALUOp::UQAddScalar => "uqadd", + VecALUOp::SQSubScalar => "sqsub", + VecALUOp::UQSubScalar => "uqsub", + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru); + let rn = show_vreg_scalar(rn, mb_rru); + let rm = show_vreg_scalar(rm, mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::MovToNZCV { rn } => { + let rn = rn.show_rru(mb_rru); + format!("msr nzcv, {}", rn) + } + &Inst::MovFromNZCV { rd } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("mrs {}, nzcv", rd) + } + &Inst::CondSet { rd, cond } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("cset {}, {}", rd, cond) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits >= 8 => { + // Is the destination a 32-bit register? Corresponds to whether + // extend-to width is <= 32 bits, *unless* we have an unsigned + // 32-to-64-bit extension, which is implemented with a "mov" to a + // 32-bit (W-reg) dest, because this zeroes the top 32 bits. + let dest_is32 = if !signed && from_bits == 32 && to_bits == 64 { + true + } else { + to_bits <= 32 + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32); + let rn = show_ireg_sized(rn, mb_rru, from_bits <= 32); + let op = match (signed, from_bits, to_bits) { + (false, 8, 32) => "uxtb", + (true, 8, 32) => "sxtb", + (false, 16, 32) => "uxth", + (true, 16, 32) => "sxth", + (false, 8, 64) => "uxtb", + (true, 8, 64) => "sxtb", + (false, 16, 64) => "uxth", + (true, 16, 64) => "sxth", + (false, 32, 64) => "mov", // special case (see above). + (true, 32, 64) => "sxtw", + _ => panic!("Unsupported Extend case: {:?}", self), + }; + format!("{} {}, {}", op, rd, rn) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && signed => { + let dest_is32 = to_bits <= 32; + let zr = if dest_is32 { "wzr" } else { "xzr" }; + let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_is32); + let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true); + format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + .. + } if from_bits == 1 && !signed => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, /* is32 = */ true); + let rn = show_ireg_sized(rn, mb_rru, /* is32 = */ true); + format!("and {}, {}, #1", rd, rn) + } + &Inst::Extend { .. } => { + panic!("Unsupported Extend case"); + } + &Inst::Call { dest: _, .. } => format!("bl 0"), + &Inst::CallInd { rn, .. } => { + let rn = rn.show_rru(mb_rru); + format!("blr {}", rn) + } + &Inst::Ret {} => "ret".to_string(), + &Inst::EpiloguePlaceholder {} => "epilogue placeholder".to_string(), + &Inst::Jump { ref dest } => { + let dest = dest.show_rru(mb_rru); + format!("b {}", dest) + } + &Inst::CondBr { + ref taken, + ref not_taken, + ref kind, + } => { + let taken = taken.show_rru(mb_rru); + let not_taken = not_taken.show_rru(mb_rru); + match kind { + &CondBrKind::Zero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbz {}, {} ; b {}", reg, taken, not_taken) + } + &CondBrKind::NotZero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbnz {}, {} ; b {}", reg, taken, not_taken) + } + &CondBrKind::Cond(c) => { + let c = c.show_rru(mb_rru); + format!("b.{} {} ; b {}", c, taken, not_taken) + } + } + } + &Inst::CondBrLowered { + ref target, + ref kind, + } => { + let target = target.show_rru(mb_rru); + match &kind { + &CondBrKind::Zero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbz {}, {}", reg, target) + } + &CondBrKind::NotZero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbnz {}, {}", reg, target) + } + &CondBrKind::Cond(c) => { + let c = c.show_rru(mb_rru); + format!("b.{} {}", c, target) + } + } + } + &Inst::CondBrLoweredCompound { + ref taken, + ref not_taken, + ref kind, + } => { + let first = Inst::CondBrLowered { + target: taken.clone(), + kind: kind.clone(), + }; + let second = Inst::Jump { + dest: not_taken.clone(), + }; + first.show_rru(mb_rru) + " ; " + &second.show_rru(mb_rru) + } + &Inst::IndirectBr { rn, .. } => { + let rn = rn.show_rru(mb_rru); + format!("br {}", rn) + } + &Inst::Brk => "brk #0".to_string(), + &Inst::Udf { .. } => "udf".to_string(), + &Inst::Adr { rd, ref label } => { + let rd = rd.show_rru(mb_rru); + let label = label.show_rru(mb_rru); + format!("adr {}, {}", rd, label) + } + &Inst::Word4 { data } => format!("data.i32 {}", data), + &Inst::Word8 { data } => format!("data.i64 {}", data), + &Inst::JTSequence { + ref targets, + ridx, + rtmp1, + rtmp2, + .. + } => { + let ridx = ridx.show_rru(mb_rru); + let rtmp1 = rtmp1.show_rru(mb_rru); + let rtmp2 = rtmp2.show_rru(mb_rru); + format!( + concat!( + "adr {}, pc+16 ; ", + "ldrsw {}, [{}, {}, LSL 2] ; ", + "add {}, {}, {} ; ", + "br {} ; ", + "jt_entries {:?}" + ), + rtmp1, rtmp2, rtmp1, ridx, rtmp1, rtmp1, rtmp2, rtmp1, targets + ) + } + &Inst::LoadConst64 { rd, const_data } => { + let rd = rd.show_rru(mb_rru); + format!("ldr {}, 8 ; b 12 ; data {:?}", rd, const_data) + } + &Inst::LoadExtName { + rd, + ref name, + offset, + srcloc: _srcloc, + } => { + let rd = rd.show_rru(mb_rru); + format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) + } + } + } +} diff --git a/cranelift/codegen/src/isa/arm64/inst/regs.rs b/cranelift/codegen/src/isa/arm64/inst/regs.rs new file mode 100644 index 0000000000..31a915410a --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/inst/regs.rs @@ -0,0 +1,273 @@ +//! ARM64 ISA definitions: registers. + +#![allow(dead_code)] + +use crate::machinst::*; + +use regalloc::{ + RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, SpillSlot, VirtualReg, Writable, + NUM_REG_CLASSES, +}; + +use std::string::{String, ToString}; + +//============================================================================= +// Registers, the Universe thereof, and printing + +#[rustfmt::skip] +const XREG_INDICES: [u8; 31] = [ + // X0 - X7 + 32, 33, 34, 35, 36, 37, 38, 39, + // X8 - X14 + 40, 41, 42, 43, 44, 45, 46, + // X15 + 59, + // X16, X17 + 47, 48, + // X18 + 60, + // X19 - X28 + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + // X29 + 61, + // X30 + 62, +]; + +const ZERO_REG_INDEX: u8 = 63; + +const SP_REG_INDEX: u8 = 64; + +/// Get a reference to an X-register (integer register). +pub fn xreg(num: u8) -> Reg { + assert!(num < 31); + Reg::new_real( + RegClass::I64, + /* enc = */ num, + /* index = */ XREG_INDICES[num as usize], + ) +} + +/// Get a writable reference to an X-register. +pub fn writable_xreg(num: u8) -> Writable { + Writable::from_reg(xreg(num)) +} + +/// Get a reference to a V-register (vector/FP register). +pub fn vreg(num: u8) -> Reg { + assert!(num < 32); + Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num) +} + +/// Get a writable reference to a V-register. +pub fn writable_vreg(num: u8) -> Writable { + Writable::from_reg(vreg(num)) +} + +/// Get a reference to the zero-register. +pub fn zero_reg() -> Reg { + // This should be the same as what xreg(31) returns, except that + // we use the special index into the register index space. + Reg::new_real( + RegClass::I64, + /* enc = */ 31, + /* index = */ ZERO_REG_INDEX, + ) +} + +/// Get a writable reference to the zero-register (this discards a result). +pub fn writable_zero_reg() -> Writable { + Writable::from_reg(zero_reg()) +} + +/// Get a reference to the stack-pointer register. +pub fn stack_reg() -> Reg { + // XSP (stack) and XZR (zero) are logically different registers which have + // the same hardware encoding, and whose meaning, in real arm64 + // instructions, is context-dependent. For convenience of + // universe-construction and for correct printing, we make them be two + // different real registers. + Reg::new_real( + RegClass::I64, + /* enc = */ 31, + /* index = */ SP_REG_INDEX, + ) +} + +/// Get a writable reference to the stack-pointer register. +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the link register (x30). +pub fn link_reg() -> Reg { + xreg(30) +} + +/// Get a writable reference to the link register. +pub fn writable_link_reg() -> Writable { + Writable::from_reg(link_reg()) +} + +/// Get a reference to the frame pointer (x29). +pub fn fp_reg() -> Reg { + xreg(29) +} + +/// Get a writable reference to the frame pointer. +pub fn writable_fp_reg() -> Writable { + Writable::from_reg(fp_reg()) +} + +/// Get a reference to the "spill temp" register. This register is used to +/// compute the address of a spill slot when a direct offset addressing mode from +/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc +/// and reserve it for this purpose for simplicity; otherwise we need a +/// multi-stage analysis where we first determine how many spill slots we have, +/// then perhaps remove the reg from the pool and recompute regalloc. +pub fn spilltmp_reg() -> Reg { + xreg(15) +} + +/// Get a writable reference to the spilltmp reg. +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +/// Create the register universe for ARM64. +pub fn create_reg_universe() -> RealRegUniverse { + let mut regs = vec![]; + let mut allocable_by_class = [None; NUM_REG_CLASSES]; + + // Numbering Scheme: we put V-regs first, then X-regs. The X-regs + // exclude several registers: x18 (globally reserved for platform-specific + // purposes), x29 (frame pointer), x30 (link register), x31 (stack pointer + // or zero register, depending on context). + + let v_reg_base = 0u8; // in contiguous real-register index space + let v_reg_count = 32; + for i in 0u8..v_reg_count { + let reg = Reg::new_real( + RegClass::V128, + /* enc = */ i, + /* index = */ v_reg_base + i, + ) + .to_real_reg(); + let name = format!("v{}", i); + regs.push((reg, name)); + } + let v_reg_last = v_reg_base + v_reg_count - 1; + + // Add the X registers. N.B.: the order here must match the order implied + // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above. + + let x_reg_base = 32u8; // in contiguous real-register index space + let mut x_reg_count = 0; + for i in 0u8..32u8 { + // See above for excluded registers. + if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 { + continue; + } + let reg = Reg::new_real( + RegClass::I64, + /* enc = */ i, + /* index = */ x_reg_base + x_reg_count, + ) + .to_real_reg(); + let name = format!("x{}", i); + regs.push((reg, name)); + x_reg_count += 1; + } + let x_reg_last = x_reg_base + x_reg_count - 1; + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: x_reg_base as usize, + last: x_reg_last as usize, + suggested_scratch: Some(XREG_INDICES[13] as usize), + }); + allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { + first: v_reg_base as usize, + last: v_reg_last as usize, + suggested_scratch: Some(/* V31: */ 31), + }); + + // Other regs, not available to the allocator. + let allocable = regs.len(); + regs.push((xreg(15).to_real_reg(), "x15".to_string())); + regs.push((xreg(18).to_real_reg(), "x18".to_string())); + regs.push((fp_reg().to_real_reg(), "fp".to_string())); + regs.push((link_reg().to_real_reg(), "lr".to_string())); + regs.push((zero_reg().to_real_reg(), "xzr".to_string())); + regs.push((stack_reg().to_real_reg(), "sp".to_string())); + // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs + // to 65, which is potentially inconvenient from a compiler performance + // standpoint. We could possibly drop back to 64 by "losing" a vector + // register in future. + + // Assert sanity: the indices in the register structs must match their + // actual indices in the array. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } + + RealRegUniverse { + regs, + allocable, + allocable_by_class, + } +} + +/// If |ireg| denotes an I64-classed reg, make a best-effort attempt to show +/// its name at the 32-bit size. +pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String { + let mut s = reg.show_rru(mb_rru); + if reg.get_class() != RegClass::I64 || !is32 { + // We can't do any better. + return s; + } + + if reg.is_real() { + // Change (eg) "x42" into "w42" as appropriate + if reg.get_class() == RegClass::I64 && is32 && s.starts_with("x") { + s = "w".to_string() + &s[1..]; + } + } else { + // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role + if reg.get_class() == RegClass::I64 && is32 { + s = s + &"w"; + } + } + s +} + +/// Show a vector register when its use as a 32-bit or 64-bit float is known. +pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, is32: bool) -> String { + let s = reg.show_rru(mb_rru); + if reg.get_class() != RegClass::V128 { + return s; + } + let prefix = if is32 { "s" } else { "d" }; + prefix.to_string() + &s[1..] +} + +/// Show a vector register used in a scalar context. +pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String { + let mut s = reg.show_rru(mb_rru); + if reg.get_class() != RegClass::V128 { + // We can't do any better. + return s; + } + + if reg.is_real() { + // Change (eg) "v0" into "d0". + if reg.get_class() == RegClass::V128 && s.starts_with("v") { + s = "d".to_string() + &s[1..]; + } + } else { + // Add a "d" suffix to RegClass::V128 vregs. + if reg.get_class() == RegClass::V128 { + s = s + &"d"; + } + } + s +} diff --git a/cranelift/codegen/src/isa/arm64/mod.rs b/cranelift/codegen/src/isa/arm64/mod.rs index 2bd6dce476..b6a28a5dbd 100644 --- a/cranelift/codegen/src/isa/arm64/mod.rs +++ b/cranelift/codegen/src/isa/arm64/mod.rs @@ -1 +1 @@ -// Empty. +mod inst;