Merge pull request #2061 from cfallin/aarch64-amode
Aarch64 codegen quality: support more general add+extend address computations.
This commit is contained in:
@@ -2,9 +2,8 @@
|
|||||||
//!
|
//!
|
||||||
//! TODO: opportunities for better code generation:
|
//! TODO: opportunities for better code generation:
|
||||||
//!
|
//!
|
||||||
//! - Smarter use of addressing modes. Recognize a+SCALE*b patterns; recognize
|
//! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize
|
||||||
//! and incorporate sign/zero extension on indices. Recognize pre/post-index
|
//! pre/post-index opportunities.
|
||||||
//! opportunities.
|
|
||||||
//!
|
//!
|
||||||
//! - Floating-point immediates (FIMM instruction).
|
//! - Floating-point immediates (FIMM instruction).
|
||||||
|
|
||||||
@@ -21,8 +20,9 @@ use crate::isa::aarch64::AArch64Backend;
|
|||||||
|
|
||||||
use super::lower_inst;
|
use super::lower_inst;
|
||||||
|
|
||||||
use log::debug;
|
use log::{debug, trace};
|
||||||
use regalloc::{Reg, RegClass, Writable};
|
use regalloc::{Reg, RegClass, Writable};
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
//============================================================================
|
//============================================================================
|
||||||
// Result enum types.
|
// Result enum types.
|
||||||
@@ -573,105 +573,251 @@ pub(crate) fn alu_inst_immshift(
|
|||||||
// Lowering: addressing mode support. Takes instruction directly, rather
|
// Lowering: addressing mode support. Takes instruction directly, rather
|
||||||
// than an `InsnInput`, to do more introspection.
|
// than an `InsnInput`, to do more introspection.
|
||||||
|
|
||||||
|
/// 32-bit addends that make up an address: an input, and an extension mode on that
|
||||||
|
/// input.
|
||||||
|
type AddressAddend32List = SmallVec<[(Reg, ExtendOp); 4]>;
|
||||||
|
/// 64-bit addends that make up an address: just an input.
|
||||||
|
type AddressAddend64List = SmallVec<[Reg; 4]>;
|
||||||
|
|
||||||
|
/// Collect all addends that feed into an address computation, with extend-modes
|
||||||
|
/// on each. Note that a load/store may have multiple address components (and
|
||||||
|
/// the CLIF semantics are that these components are added to form the final
|
||||||
|
/// address), but sometimes the CLIF that we receive still has arguments that
|
||||||
|
/// refer to `iadd` instructions. We also want to handle uextend/sextend below
|
||||||
|
/// the add(s).
|
||||||
|
///
|
||||||
|
/// We match any 64-bit add (and descend into its inputs), and we match any
|
||||||
|
/// 32-to-64-bit sign or zero extension. The returned addend-list will use
|
||||||
|
/// NarrowValueMode values to indicate how to extend each input:
|
||||||
|
///
|
||||||
|
/// - NarrowValueMode::None: the associated input is 64 bits wide; no extend.
|
||||||
|
/// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide;
|
||||||
|
/// do a sign-extension.
|
||||||
|
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
|
||||||
|
/// do a zero-extension.
|
||||||
|
///
|
||||||
|
/// We do not descend further into the inputs of extensions, because supporting
|
||||||
|
/// (e.g.) a 32-bit add that is later extended would require additional masking
|
||||||
|
/// of high-order bits, which is too complex. So, in essence, we descend any
|
||||||
|
/// number of adds from the roots, collecting all 64-bit address addends; then
|
||||||
|
/// possibly support extensions at these leaves.
|
||||||
|
fn collect_address_addends<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
roots: &[InsnInput],
|
||||||
|
) -> (AddressAddend64List, AddressAddend32List, i64) {
|
||||||
|
let mut result32: AddressAddend32List = SmallVec::new();
|
||||||
|
let mut result64: AddressAddend64List = SmallVec::new();
|
||||||
|
let mut offset: i64 = 0;
|
||||||
|
|
||||||
|
let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect();
|
||||||
|
|
||||||
|
while let Some(input) = workqueue.pop() {
|
||||||
|
debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64);
|
||||||
|
if let Some((op, insn)) = maybe_input_insn_multi(
|
||||||
|
ctx,
|
||||||
|
input,
|
||||||
|
&[
|
||||||
|
Opcode::Uextend,
|
||||||
|
Opcode::Sextend,
|
||||||
|
Opcode::Iadd,
|
||||||
|
Opcode::Iconst,
|
||||||
|
],
|
||||||
|
) {
|
||||||
|
match op {
|
||||||
|
Opcode::Uextend | Opcode::Sextend if ty_bits(ctx.input_ty(insn, 0)) == 32 => {
|
||||||
|
let extendop = if op == Opcode::Uextend {
|
||||||
|
ExtendOp::UXTW
|
||||||
|
} else {
|
||||||
|
ExtendOp::SXTW
|
||||||
|
};
|
||||||
|
let extendee_input = InsnInput { insn, input: 0 };
|
||||||
|
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
|
||||||
|
result32.push((reg, extendop));
|
||||||
|
}
|
||||||
|
Opcode::Uextend | Opcode::Sextend => {
|
||||||
|
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
|
||||||
|
result64.push(reg);
|
||||||
|
}
|
||||||
|
Opcode::Iadd => {
|
||||||
|
for input in 0..ctx.num_inputs(insn) {
|
||||||
|
let addend = InsnInput { insn, input };
|
||||||
|
workqueue.push(addend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Opcode::Iconst => {
|
||||||
|
let value: i64 = ctx.get_constant(insn).unwrap() as i64;
|
||||||
|
offset += value;
|
||||||
|
}
|
||||||
|
_ => panic!("Unexpected opcode from maybe_input_insn_multi"),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64);
|
||||||
|
result64.push(reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(result64, result32, offset)
|
||||||
|
}
|
||||||
|
|
||||||
/// Lower the address of a load or store.
|
/// Lower the address of a load or store.
|
||||||
pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
|
||||||
ctx: &mut C,
|
ctx: &mut C,
|
||||||
elem_ty: Type,
|
elem_ty: Type,
|
||||||
addends: &[InsnInput],
|
roots: &[InsnInput],
|
||||||
offset: i32,
|
offset: i32,
|
||||||
) -> MemArg {
|
) -> MemArg {
|
||||||
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
|
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
|
||||||
// mul instructions (Load/StoreComplex don't include scale factors).
|
// mul instructions (Load/StoreComplex don't include scale factors).
|
||||||
|
|
||||||
// Handle one reg and offset.
|
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
|
||||||
if addends.len() == 1 {
|
// extends and addition ops. We update these as we consume address
|
||||||
let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
// components, so they represent the remaining addends not yet handled.
|
||||||
return MemArg::RegOffset(reg, offset as i64, elem_ty);
|
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots);
|
||||||
}
|
let mut offset = args_offset + (offset as i64);
|
||||||
|
|
||||||
// Handle two regs and a zero offset with built-in extend, if possible.
|
trace!(
|
||||||
if addends.len() == 2 && offset == 0 {
|
"lower_address: addends64 {:?}, addends32 {:?}, offset {}",
|
||||||
// r1, r2 (to be extended), r2_bits, is_signed
|
addends64,
|
||||||
let mut parts: Option<(Reg, Reg, usize, bool)> = None;
|
addends32,
|
||||||
// Handle extension of either first or second addend.
|
offset
|
||||||
for i in 0..2 {
|
);
|
||||||
if let Some((op, ext_insn)) =
|
|
||||||
maybe_input_insn_multi(ctx, addends[i], &[Opcode::Uextend, Opcode::Sextend])
|
// First, decide what the `MemArg` will be. Take one extendee and one 64-bit
|
||||||
{
|
// reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension,
|
||||||
// Non-extended addend.
|
// or some other combination as appropriate.
|
||||||
let r1 = put_input_in_reg(ctx, addends[1 - i], NarrowValueMode::ZeroExtend64);
|
let memarg = if addends64.len() > 0 {
|
||||||
// Extended addend.
|
if addends32.len() > 0 {
|
||||||
let r2 = put_input_in_reg(
|
let (reg32, extendop) = addends32.pop().unwrap();
|
||||||
ctx,
|
let reg64 = addends64.pop().unwrap();
|
||||||
InsnInput {
|
MemArg::RegExtended(reg64, reg32, extendop)
|
||||||
insn: ext_insn,
|
} else if offset > 0 && offset < 0x1000 {
|
||||||
input: 0,
|
let reg64 = addends64.pop().unwrap();
|
||||||
},
|
let off = offset;
|
||||||
NarrowValueMode::None,
|
offset = 0;
|
||||||
);
|
MemArg::RegOffset(reg64, off, elem_ty)
|
||||||
let r2_bits = ty_bits(ctx.input_ty(ext_insn, 0));
|
} else if addends64.len() >= 2 {
|
||||||
parts = Some((
|
let reg1 = addends64.pop().unwrap();
|
||||||
r1,
|
let reg2 = addends64.pop().unwrap();
|
||||||
r2,
|
MemArg::RegReg(reg1, reg2)
|
||||||
r2_bits,
|
} else {
|
||||||
/* is_signed = */ op == Opcode::Sextend,
|
let reg1 = addends64.pop().unwrap();
|
||||||
));
|
MemArg::reg(reg1)
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else
|
||||||
if let Some((r1, r2, r2_bits, is_signed)) = parts {
|
/* addends64.len() == 0 */
|
||||||
match (r2_bits, is_signed) {
|
{
|
||||||
(32, false) => {
|
if addends32.len() > 0 {
|
||||||
return MemArg::RegExtended(r1, r2, ExtendOp::UXTW);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
}
|
let (reg1, extendop) = addends32.pop().unwrap();
|
||||||
(32, true) => {
|
let signed = match extendop {
|
||||||
return MemArg::RegExtended(r1, r2, ExtendOp::SXTW);
|
ExtendOp::SXTW => true,
|
||||||
}
|
ExtendOp::UXTW => false,
|
||||||
_ => {}
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::Extend {
|
||||||
|
rd: tmp,
|
||||||
|
rn: reg1,
|
||||||
|
signed,
|
||||||
|
from_bits: 32,
|
||||||
|
to_bits: 64,
|
||||||
|
});
|
||||||
|
if let Some((reg2, extendop)) = addends32.pop() {
|
||||||
|
MemArg::RegExtended(tmp.to_reg(), reg2, extendop)
|
||||||
|
} else {
|
||||||
|
MemArg::reg(tmp.to_reg())
|
||||||
}
|
}
|
||||||
|
} else
|
||||||
|
/* addends32.len() == 0 */
|
||||||
|
{
|
||||||
|
let off_reg = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
lower_constant_u64(ctx, off_reg, offset as u64);
|
||||||
|
offset = 0;
|
||||||
|
MemArg::reg(off_reg.to_reg())
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// At this point, if we have any remaining components, we need to allocate a
|
||||||
|
// temp, replace one of the registers in the MemArg with the temp, and emit
|
||||||
|
// instructions to add together the remaining components. Return immediately
|
||||||
|
// if this is *not* the case.
|
||||||
|
if offset == 0 && addends32.len() == 0 && addends64.len() == 0 {
|
||||||
|
return memarg;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle two regs and a zero offset in the general case, if possible.
|
// Allocate the temp and shoehorn it into the MemArg.
|
||||||
if addends.len() == 2 && offset == 0 {
|
|
||||||
let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
|
|
||||||
let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
|
|
||||||
return MemArg::reg_plus_reg(ra, rb);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, generate add instructions.
|
|
||||||
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
let addr = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
let (reg, memarg) = match memarg {
|
||||||
|
MemArg::RegExtended(r1, r2, extendop) => {
|
||||||
|
(r1, MemArg::RegExtended(addr.to_reg(), r2, extendop))
|
||||||
|
}
|
||||||
|
MemArg::RegOffset(r, off, ty) => (r, MemArg::RegOffset(addr.to_reg(), off, ty)),
|
||||||
|
MemArg::RegReg(r1, r2) => (r2, MemArg::RegReg(addr.to_reg(), r1)),
|
||||||
|
MemArg::UnsignedOffset(r, imm) => (r, MemArg::UnsignedOffset(addr.to_reg(), imm)),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
// Get the const into a reg.
|
// If there is any offset, load that first into `addr`, and add the `reg`
|
||||||
lower_constant_u64(ctx, addr.clone(), offset as u64);
|
// that we kicked out of the `MemArg`; otherwise, start with that reg.
|
||||||
|
if offset != 0 {
|
||||||
|
// If we can fit offset or -offset in an imm12, use an add-imm
|
||||||
|
// to combine the reg and offset. Otherwise, load value first then add.
|
||||||
|
if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) {
|
||||||
|
ctx.emit(Inst::AluRRImm12 {
|
||||||
|
alu_op: ALUOp::Add64,
|
||||||
|
rd: addr,
|
||||||
|
rn: reg,
|
||||||
|
imm12,
|
||||||
|
});
|
||||||
|
} else if let Some(imm12) = Imm12::maybe_from_u64(offset.wrapping_neg() as u64) {
|
||||||
|
ctx.emit(Inst::AluRRImm12 {
|
||||||
|
alu_op: ALUOp::Sub64,
|
||||||
|
rd: addr,
|
||||||
|
rn: reg,
|
||||||
|
imm12,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
lower_constant_u64(ctx, addr, offset as u64);
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Add64,
|
||||||
|
rd: addr,
|
||||||
|
rn: addr.to_reg(),
|
||||||
|
rm: reg,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::gen_move(addr, reg, I64));
|
||||||
|
}
|
||||||
|
|
||||||
// Add each addend to the address.
|
// Now handle reg64 and reg32-extended components.
|
||||||
for addend in addends {
|
for reg in addends64 {
|
||||||
let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
|
// If the register is the stack reg, we must move it to another reg
|
||||||
|
// before adding it.
|
||||||
// In an addition, the stack register is the zero register, so divert it to another
|
|
||||||
// register just before doing the actual add.
|
|
||||||
let reg = if reg == stack_reg() {
|
let reg = if reg == stack_reg() {
|
||||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
ctx.emit(Inst::Mov {
|
ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
|
||||||
rd: tmp,
|
|
||||||
rm: stack_reg(),
|
|
||||||
});
|
|
||||||
tmp.to_reg()
|
tmp.to_reg()
|
||||||
} else {
|
} else {
|
||||||
reg
|
reg
|
||||||
};
|
};
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRR {
|
ctx.emit(Inst::AluRRR {
|
||||||
alu_op: ALUOp::Add64,
|
alu_op: ALUOp::Add64,
|
||||||
rd: addr.clone(),
|
rd: addr,
|
||||||
rn: addr.to_reg(),
|
rn: addr.to_reg(),
|
||||||
rm: reg.clone(),
|
rm: reg,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (reg, extendop) in addends32 {
|
||||||
|
assert!(reg != stack_reg());
|
||||||
|
ctx.emit(Inst::AluRRRExtend {
|
||||||
|
alu_op: ALUOp::Add64,
|
||||||
|
rd: addr,
|
||||||
|
rn: addr.to_reg(),
|
||||||
|
rm: reg,
|
||||||
|
extendop,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
MemArg::reg(addr.to_reg())
|
memarg
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
|
pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ block0(v0: i64, v1: i32):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
function %f1(i64, i32) -> i32 {
|
function %f2(i64, i32) -> i32 {
|
||||||
block0(v0: i64, v1: i32):
|
block0(v0: i64, v1: i32):
|
||||||
v2 = uextend.i64 v1
|
v2 = uextend.i64 v1
|
||||||
v3 = load_complex.i32 v2+v0
|
v3 = load_complex.i32 v2+v0
|
||||||
@@ -29,7 +29,7 @@ block0(v0: i64, v1: i32):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
function %f1(i64, i32) -> i32 {
|
function %f3(i64, i32) -> i32 {
|
||||||
block0(v0: i64, v1: i32):
|
block0(v0: i64, v1: i32):
|
||||||
v2 = sextend.i64 v1
|
v2 = sextend.i64 v1
|
||||||
v3 = load_complex.i32 v0+v2
|
v3 = load_complex.i32 v0+v2
|
||||||
@@ -43,7 +43,7 @@ block0(v0: i64, v1: i32):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
function %f1(i64, i32) -> i32 {
|
function %f4(i64, i32) -> i32 {
|
||||||
block0(v0: i64, v1: i32):
|
block0(v0: i64, v1: i32):
|
||||||
v2 = sextend.i64 v1
|
v2 = sextend.i64 v1
|
||||||
v3 = load_complex.i32 v2+v0
|
v3 = load_complex.i32 v2+v0
|
||||||
@@ -56,3 +56,216 @@ block0(v0: i64, v1: i32):
|
|||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f5(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = iadd.i64 v0, v2
|
||||||
|
v4 = load.i32 v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f6(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = iadd.i64 v2, v0
|
||||||
|
v4 = load.i32 v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f7(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = uextend.i64 v0
|
||||||
|
v3 = uextend.i64 v1
|
||||||
|
v4 = iadd.i64 v2, v3
|
||||||
|
v5 = load.i32 v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov w0, w0
|
||||||
|
; nextln: ldr w0, [x0, w1, UXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f8(i64, i32) -> i32 {
|
||||||
|
block0(v0: i64, v1: i32):
|
||||||
|
v2 = sextend.i64 v1
|
||||||
|
v3 = iconst.i64 32
|
||||||
|
v4 = iadd.i64 v2, v3
|
||||||
|
v5 = iadd.i64 v4, v0
|
||||||
|
v6 = iadd.i64 v5, v5
|
||||||
|
v7 = load.i32 v6+4
|
||||||
|
return v7
|
||||||
|
}
|
||||||
|
|
||||||
|
; v6+4 = 2*v5 = 2*v4 + 2*v0 + 4 = 2*v2 + 2*v3 + 2*v0 + 4
|
||||||
|
; = 2*sextend($x1) + 2*$x0 + 68
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: add x2, x0, #68
|
||||||
|
; nextln: add x0, x2, x0
|
||||||
|
; nextln: add x0, x0, x1, SXTW
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f9(i64, i64, i64) -> i32 {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64):
|
||||||
|
v3 = iconst.i64 48
|
||||||
|
v4 = iadd.i64 v0, v1
|
||||||
|
v5 = iadd.i64 v4, v2
|
||||||
|
v6 = iadd.i64 v5, v3
|
||||||
|
v7 = load.i32 v6
|
||||||
|
return v7
|
||||||
|
}
|
||||||
|
|
||||||
|
; v6 = $x0 + $x1 + $x2 + 48
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: add x0, x0, x2
|
||||||
|
; nextln: add x0, x0, x1
|
||||||
|
; nextln: ldur w0, [x0, #48]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f10(i64, i64, i64) -> i32 {
|
||||||
|
block0(v0: i64, v1: i64, v2: i64):
|
||||||
|
v3 = iconst.i64 4100
|
||||||
|
v4 = iadd.i64 v0, v1
|
||||||
|
v5 = iadd.i64 v4, v2
|
||||||
|
v6 = iadd.i64 v5, v3
|
||||||
|
v7 = load.i32 v6
|
||||||
|
return v7
|
||||||
|
}
|
||||||
|
|
||||||
|
; v6 = $x0 + $x1 + $x2 + 4100
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x3, #4100
|
||||||
|
; nextln: add x1, x3, x1
|
||||||
|
; nextln: add x1, x1, x2
|
||||||
|
; nextln: ldr w0, [x1, x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f10() -> i32 {
|
||||||
|
block0:
|
||||||
|
v1 = iconst.i64 1234
|
||||||
|
v2 = load.i32 v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; v6 = $x0 + $x1 + $x2 + 48
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x0, #1234
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f11(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 8388608 ; Imm12: 0x800 << 12
|
||||||
|
v2 = iadd.i64 v0, v1
|
||||||
|
v3 = load.i32 v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: add x0, x0, #8388608
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f12(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 -4
|
||||||
|
v2 = iadd.i64 v0, v1
|
||||||
|
v3 = load.i32 v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sub x0, x0, #4
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f13(i64) -> i32 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iconst.i64 1000000000
|
||||||
|
v2 = iadd.i64 v0, v1
|
||||||
|
v3 = load.i32 v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: movz x1, #51712
|
||||||
|
; nextln: movk x1, #15258, LSL #16
|
||||||
|
; nextln: add x0, x1, x0
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f14(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = sextend.i64 v0
|
||||||
|
v2 = load.i32 v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sxtw x0, w0
|
||||||
|
; nextln: ldr w0, [x0]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %f15(i32, i32) -> i32 {
|
||||||
|
block0(v0: i32, v1: i32):
|
||||||
|
v2 = sextend.i64 v0
|
||||||
|
v3 = sextend.i64 v1
|
||||||
|
v4 = iadd.i64 v2, v3
|
||||||
|
v5 = load.i32 v4
|
||||||
|
return v5
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: sxtw x0, w0
|
||||||
|
; nextln: ldr w0, [x0, w1, SXTW]
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ block0(v0: i64, v1: i32):
|
|||||||
; check: Block 0:
|
; check: Block 0:
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: ldur w2, [x0]
|
; nextln: ldr w2, [x0]
|
||||||
; nextln: add w2, w2, #0
|
; nextln: add w2, w2, #0
|
||||||
; nextln: subs wzr, w1, w2
|
; nextln: subs wzr, w1, w2
|
||||||
; nextln: b.ls label1 ; b label2
|
; nextln: b.ls label1 ; b label2
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ block3(v7: r64, v8: r64):
|
|||||||
; nextln: ldur x19, [sp, #32]
|
; nextln: ldur x19, [sp, #32]
|
||||||
; nextln: ldur x20, [sp, #40]
|
; nextln: ldur x20, [sp, #40]
|
||||||
; nextln: add x1, sp, #16
|
; nextln: add x1, sp, #16
|
||||||
; nextln: stur x19, [x1]
|
; nextln: str x19, [x1]
|
||||||
; nextln: and w0, w0, #1
|
; nextln: and w0, w0, #1
|
||||||
; nextln: cbz x0, label1 ; b label3
|
; nextln: cbz x0, label1 ; b label3
|
||||||
; check: Block 1:
|
; check: Block 1:
|
||||||
@@ -108,7 +108,7 @@ block3(v7: r64, v8: r64):
|
|||||||
; nextln: b label5
|
; nextln: b label5
|
||||||
; check: Block 5:
|
; check: Block 5:
|
||||||
; check: add x1, sp, #16
|
; check: add x1, sp, #16
|
||||||
; nextln: ldur x1, [x1]
|
; nextln: ldr x1, [x1]
|
||||||
; nextln: mov x2, x1
|
; nextln: mov x2, x1
|
||||||
; nextln: mov x1, x19
|
; nextln: mov x1, x19
|
||||||
; nextln: ldp x19, x20, [sp], #16
|
; nextln: ldp x19, x20, [sp], #16
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ block0:
|
|||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: sub sp, sp, #16
|
; nextln: sub sp, sp, #16
|
||||||
; nextln: mov x0, sp
|
; nextln: mov x0, sp
|
||||||
; nextln: ldur x0, [x0]
|
; nextln: ldr x0, [x0]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -71,7 +71,7 @@ block0:
|
|||||||
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov x0, sp
|
; nextln: mov x0, sp
|
||||||
; nextln: ldur x0, [x0]
|
; nextln: ldr x0, [x0]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -89,7 +89,7 @@ block0(v0: i64):
|
|||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: sub sp, sp, #16
|
; nextln: sub sp, sp, #16
|
||||||
; nextln: mov x1, sp
|
; nextln: mov x1, sp
|
||||||
; nextln: stur x0, [x1]
|
; nextln: str x0, [x1]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -109,7 +109,7 @@ block0(v0: i64):
|
|||||||
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov x1, sp
|
; nextln: mov x1, sp
|
||||||
; nextln: stur x0, [x1]
|
; nextln: str x0, [x1]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|||||||
Reference in New Issue
Block a user