diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 88aa60f8af..131bd6ee42 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -497,6 +497,16 @@ impl ABIBody for AArch64ABIBody { store_stack(fp_off, from_reg, ty) } + fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Inst { + // Offset from beginning of stackslot area, which is at FP - stackslots_size. + let stack_off = self.stackslots[slot.as_u32() as usize] as i64; + let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); + Inst::LoadAddr { + rd: into_reg, + mem: MemArg::FPOffset(fp_off), + } + } + // Load from a spillslot. fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { // Note that when spills/fills are generated, we don't yet know how many diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index f01746543c..9d9d98d7c8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -6,11 +6,10 @@ use crate::ir::types::*; use crate::ir::TrapCode; use crate::isa::aarch64::inst::*; -use core::convert::TryFrom; - use regalloc::{Reg, RegClass, Writable}; use alloc::vec::Vec; +use core::convert::TryFrom; /// Memory label/reference finalization: convert a MemLabel to a PC-relative /// offset, possibly emitting relocation(s) as necessary. @@ -1275,6 +1274,39 @@ impl MachInstEmit for Inst { sink.add_reloc(srcloc, Reloc::Abs8, name, offset); sink.put8(0); } + &Inst::LoadAddr { rd, ref mem } => match *mem { + MemArg::FPOffset(fp_off) => { + let alu_op = if fp_off < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) + { + let inst = Inst::AluRRImm12 { + alu_op, + rd, + imm12, + rn: fp_reg(), + }; + inst.emit(sink); + } else { + let const_insts = + Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); + for inst in const_insts { + inst.emit(sink); + } + let inst = Inst::AluRRR { + alu_op, + rd, + rn: fp_reg(), + rm: rd.to_reg(), + }; + inst.emit(sink); + } + } + _ => unimplemented!("{:?}", mem), + }, } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ee55f3394f..90fce9c5fa 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -13,6 +13,7 @@ use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, W use regalloc::{RegUsageCollector, Set}; use alloc::vec::Vec; +use core::convert::TryFrom; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -707,6 +708,12 @@ pub enum Inst { srcloc: SourceLoc, offset: i64, }, + + /// Load address referenced by `mem` into `rd`. + LoadAddr { + rd: Writable, + mem: MemArg, + }, } fn count_zero_half_words(mut value: u64) -> usize { @@ -1089,6 +1096,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => { collector.add_def(rd); } + &Inst::LoadAddr { rd, mem: _ } => { + collector.add_def(rd); + } } } @@ -1643,6 +1653,13 @@ fn aarch64_map_regs( &mut Inst::LoadExtName { ref mut rd, .. } => { map_wr(d, rd); } + &mut Inst::LoadAddr { + ref mut rd, + ref mut mem, + } => { + map_wr(d, rd); + map_mem(u, mem); + } } } @@ -2536,6 +2553,42 @@ impl ShowWithRRU for Inst { let rd = rd.show_rru(mb_rru); format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) } + &Inst::LoadAddr { rd, ref mem } => match *mem { + MemArg::FPOffset(fp_off) => { + let alu_op = if fp_off < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) + { + let inst = Inst::AluRRImm12 { + alu_op, + rd, + imm12, + rn: fp_reg(), + }; + inst.show_rru(mb_rru) + } else { + let mut res = String::new(); + let const_insts = + Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); + for inst in const_insts { + res.push_str(&inst.show_rru(mb_rru)); + res.push_str("; "); + } + let inst = Inst::AluRRR { + alu_op, + rd, + rn: fp_reg(), + rm: rd.to_reg(), + }; + res.push_str(&inst.show_rru(mb_rru)); + res + } + } + _ => unimplemented!("{:?}", mem), + }, } } } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 92760a81bb..209b3d6c83 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -22,6 +22,7 @@ use crate::isa::aarch64::AArch64Backend; use regalloc::{Reg, RegClass, Writable}; use alloc::vec::Vec; +use core::convert::TryFrom; use smallvec::SmallVec; //============================================================================ @@ -1546,7 +1547,24 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { }); } - Opcode::StackLoad | Opcode::StackStore | Opcode::StackAddr => { + Opcode::StackAddr => { + let (stack_slot, offset) = match *ctx.data(insn) { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } => (stack_slot, offset), + _ => unreachable!(), + }; + let rd = output_to_reg(ctx, outputs[0]); + let offset: i32 = offset.into(); + let inst = ctx + .abi() + .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + ctx.emit(inst); + } + + Opcode::StackLoad | Opcode::StackStore => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 11a96c58b2..1410f4265b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -56,6 +56,9 @@ pub trait ABIBody { /// Update with the clobbered registers, post-regalloc. fn set_clobbered(&mut self, clobbered: Set>); + /// Get the address of a stackslot. + fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I; + /// Load from a stackslot. fn load_stackslot( &self, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 0d8fb1ff0e..393c1bdd43 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -31,6 +31,8 @@ pub trait LowerCtx { fn data(&self, ir_inst: Inst) -> &InstructionData; /// Get the controlling type for a polymorphic IR instruction. fn ty(&self, ir_inst: Inst) -> Type; + /// Get the `ABIBody`. + fn abi(&mut self) -> &dyn ABIBody; /// Emit a machine instruction. fn emit(&mut self, mach_inst: Self::I); /// Indicate that an IR instruction has been merged, and so one of its @@ -527,6 +529,10 @@ impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> { self.f.dfg.ctrl_typevar(ir_inst) } + fn abi(&mut self) -> &dyn ABIBody { + self.vcode.abi() + } + /// Emit a machine instruction. fn emit(&mut self, mach_inst: I) { self.vcode.push(mach_inst); diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack.clif b/cranelift/filetests/filetests/vcode/aarch64/stack.clif new file mode 100644 index 0000000000..99d60d97ad --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/stack.clif @@ -0,0 +1,115 @@ +test vcode +target aarch64 + +function %stack_addr_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: sub x0, fp, #8 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_addr_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +; FIXME: don't use stack_addr legalization for stack_load and stack_store + +function %stack_load_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: sub x0, fp, #8 +; nextln: ldur x0, [x0] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_load_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 +; nextln: ldur x0, [x0] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_store_small(i64) { +ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: sub x1, fp, #8 +; nextln: stur x0, [x1] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_store_big(i64) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movz x1, #34472; movk x1, #1, LSL #16; sub x1, fp, x1 +; nextln: stur x0, [x1] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index 0b60439f4d..fdea73ecb5 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -150,10 +150,20 @@ cfg_if! { .build() } } - Architecture::Aarch64 {..} => Capstone::new() - .arm64() - .mode(arch::arm64::ArchMode::Arm) - .build(), + Architecture::Aarch64 {..} => { + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build() + .map_err(|err| err.to_string())?; + // AArch64 uses inline constants rather than a separate constant pool right now. + // Without this option, Capstone will stop disassembling as soon as it sees + // an inline constant that is not also a valid instruction. With this option, + // Capstone will print a `.byte` directive with the bytes of the inline constant + // and continue to the next instruction. + cs.set_skipdata(true).map_err(|err| err.to_string())?; + Ok(cs) + } _ => return Err(String::from("Unknown ISA")), };