From 1bee1af7550980b3b862d1ba009d2f830e842ddd Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 15 Apr 2020 18:28:31 +0200 Subject: [PATCH 1/5] Implement stack_addr for AArch64 --- cranelift/codegen/src/isa/aarch64/abi.rs | 10 +++++ .../codegen/src/isa/aarch64/inst/emit.rs | 37 ++++++++++++++++++- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 22 +++++++++++ cranelift/codegen/src/isa/aarch64/lower.rs | 20 +++++++++- cranelift/codegen/src/machinst/abi.rs | 3 ++ cranelift/codegen/src/machinst/lower.rs | 6 +++ 6 files changed, 95 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 88aa60f8af..131bd6ee42 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -497,6 +497,16 @@ impl ABIBody for AArch64ABIBody { store_stack(fp_off, from_reg, ty) } + fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Inst { + // Offset from beginning of stackslot area, which is at FP - stackslots_size. + let stack_off = self.stackslots[slot.as_u32() as usize] as i64; + let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); + Inst::LoadAddr { + rd: into_reg, + mem: MemArg::FPOffset(fp_off), + } + } + // Load from a spillslot. fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { // Note that when spills/fills are generated, we don't yet know how many diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index f01746543c..644785b1fd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -6,11 +6,10 @@ use crate::ir::types::*; use crate::ir::TrapCode; use crate::isa::aarch64::inst::*; -use core::convert::TryFrom; - use regalloc::{Reg, RegClass, Writable}; use alloc::vec::Vec; +use core::convert::TryFrom; /// Memory label/reference finalization: convert a MemLabel to a PC-relative /// offset, possibly emitting relocation(s) as necessary. @@ -1275,6 +1274,40 @@ impl MachInstEmit for Inst { sink.add_reloc(srcloc, Reloc::Abs8, name, offset); sink.put8(0); } + &Inst::LoadAddr { rd, ref mem } => match *mem { + MemArg::FPOffset(fp_off) => { + let alu_op = if fp_off < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) + { + let inst = Inst::AluRRImm12 { + alu_op, + rd, + imm12, + rn: fp_reg(), + }; + inst.emit(sink); + } else { + let tmp = writable_spilltmp_reg(); + let const_insts = + Inst::load_constant(tmp, u64::try_from(fp_off.abs()).unwrap()); + for inst in const_insts { + inst.emit(sink); + } + let inst = Inst::AluRRR { + alu_op, + rd, + rn: fp_reg(), + rm: tmp.to_reg(), + }; + inst.emit(sink); + } + } + _ => unimplemented!("{:?}", mem), + }, } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ee55f3394f..6775f98b2d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -707,6 +707,12 @@ pub enum Inst { srcloc: SourceLoc, offset: i64, }, + + /// Load address referenced by `mem` into `rd`. + LoadAddr { + rd: Writable, + mem: MemArg, + }, } fn count_zero_half_words(mut value: u64) -> usize { @@ -1089,6 +1095,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => { collector.add_def(rd); } + &Inst::LoadAddr { rd, mem: _ } => { + collector.add_def(rd); + } } } @@ -1643,6 +1652,13 @@ fn aarch64_map_regs( &mut Inst::LoadExtName { ref mut rd, .. } => { map_wr(d, rd); } + &mut Inst::LoadAddr { + ref mut rd, + ref mut mem, + } => { + map_wr(d, rd); + map_mem(u, mem); + } } } @@ -2536,6 +2552,12 @@ impl ShowWithRRU for Inst { let rd = rd.show_rru(mb_rru); format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) } + &Inst::LoadAddr { rd, ref mem } => { + let rd = rd.show_rru(mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}load_addr {}, {}", mem_str, rd, mem) + } } } } diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 92760a81bb..209b3d6c83 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -22,6 +22,7 @@ use crate::isa::aarch64::AArch64Backend; use regalloc::{Reg, RegClass, Writable}; use alloc::vec::Vec; +use core::convert::TryFrom; use smallvec::SmallVec; //============================================================================ @@ -1546,7 +1547,24 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) { }); } - Opcode::StackLoad | Opcode::StackStore | Opcode::StackAddr => { + Opcode::StackAddr => { + let (stack_slot, offset) = match *ctx.data(insn) { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } => (stack_slot, offset), + _ => unreachable!(), + }; + let rd = output_to_reg(ctx, outputs[0]); + let offset: i32 = offset.into(); + let inst = ctx + .abi() + .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + ctx.emit(inst); + } + + Opcode::StackLoad | Opcode::StackStore => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 11a96c58b2..1410f4265b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -56,6 +56,9 @@ pub trait ABIBody { /// Update with the clobbered registers, post-regalloc. fn set_clobbered(&mut self, clobbered: Set>); + /// Get the address of a stackslot. + fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I; + /// Load from a stackslot. fn load_stackslot( &self, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 0d8fb1ff0e..393c1bdd43 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -31,6 +31,8 @@ pub trait LowerCtx { fn data(&self, ir_inst: Inst) -> &InstructionData; /// Get the controlling type for a polymorphic IR instruction. fn ty(&self, ir_inst: Inst) -> Type; + /// Get the `ABIBody`. + fn abi(&mut self) -> &dyn ABIBody; /// Emit a machine instruction. fn emit(&mut self, mach_inst: Self::I); /// Indicate that an IR instruction has been merged, and so one of its @@ -527,6 +529,10 @@ impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> { self.f.dfg.ctrl_typevar(ir_inst) } + fn abi(&mut self) -> &dyn ABIBody { + self.vcode.abi() + } + /// Emit a machine instruction. fn emit(&mut self, mach_inst: I) { self.vcode.push(mach_inst); From 4960c9a0c6f2eff4e49647a82ba04891c1b15b52 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Thu, 16 Apr 2020 21:17:27 +0200 Subject: [PATCH 2/5] Add tests for stack_{addr,load,store} --- .../filetests/vcode/aarch64/stack.clif | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/stack.clif diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack.clif b/cranelift/filetests/filetests/vcode/aarch64/stack.clif new file mode 100644 index 0000000000..0b503c3aa5 --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/stack.clif @@ -0,0 +1,115 @@ +test vcode +target aarch64 + +function %stack_addr_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: load_addr x0, [fp, #-8] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_addr_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x0, [x15] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +; FIXME: don't use stack_addr legalization for stack_load and stack_store + +function %stack_load_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: load_addr x0, [fp, #-8] +; nextln: ldur x0, [x0] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_load_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x0, [x15] +; nextln: ldur x0, [x0] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_store_small(i64) { +ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #16 +; nextln: load_addr x1, [fp, #-8] +; nextln: stur x0, [x1] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + + +function %stack_store_big(i64) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldr x15, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x15, UXTX +; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x1, [x15] +; nextln: stur x0, [x1] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret From 259de864e41cd19929546b63f56f24d823036ca1 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 17 Apr 2020 11:50:00 +0200 Subject: [PATCH 3/5] Reuse rd as tmp reg in LoadAddr --- cranelift/codegen/src/isa/aarch64/inst/emit.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 644785b1fd..9d9d98d7c8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1291,9 +1291,8 @@ impl MachInstEmit for Inst { }; inst.emit(sink); } else { - let tmp = writable_spilltmp_reg(); let const_insts = - Inst::load_constant(tmp, u64::try_from(fp_off.abs()).unwrap()); + Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); for inst in const_insts { inst.emit(sink); } @@ -1301,7 +1300,7 @@ impl MachInstEmit for Inst { alu_op, rd, rn: fp_reg(), - rm: tmp.to_reg(), + rm: rd.to_reg(), }; inst.emit(sink); } From cb1c9ef08509c0c475bcbc1d7e7374e6ce60d638 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 17 Apr 2020 11:50:40 +0200 Subject: [PATCH 4/5] Fix printing of LoadAddr --- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 43 ++++++++++++++++--- .../filetests/vcode/aarch64/stack.clif | 12 +++--- cranelift/src/disasm.rs | 14 ++++-- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 6775f98b2d..90fce9c5fa 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -13,6 +13,7 @@ use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, W use regalloc::{RegUsageCollector, Set}; use alloc::vec::Vec; +use core::convert::TryFrom; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -2552,12 +2553,42 @@ impl ShowWithRRU for Inst { let rd = rd.show_rru(mb_rru); format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) } - &Inst::LoadAddr { rd, ref mem } => { - let rd = rd.show_rru(mb_rru); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); - let mem = mem.show_rru(mb_rru); - format!("{}load_addr {}, {}", mem_str, rd, mem) - } + &Inst::LoadAddr { rd, ref mem } => match *mem { + MemArg::FPOffset(fp_off) => { + let alu_op = if fp_off < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) + { + let inst = Inst::AluRRImm12 { + alu_op, + rd, + imm12, + rn: fp_reg(), + }; + inst.show_rru(mb_rru) + } else { + let mut res = String::new(); + let const_insts = + Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); + for inst in const_insts { + res.push_str(&inst.show_rru(mb_rru)); + res.push_str("; "); + } + let inst = Inst::AluRRR { + alu_op, + rd, + rn: fp_reg(), + rm: rd.to_reg(), + }; + res.push_str(&inst.show_rru(mb_rru)); + res + } + } + _ => unimplemented!("{:?}", mem), + }, } } } diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack.clif b/cranelift/filetests/filetests/vcode/aarch64/stack.clif index 0b503c3aa5..99d60d97ad 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack.clif @@ -12,7 +12,7 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: load_addr x0, [fp, #-8] +; nextln: sub x0, fp, #8 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -31,7 +31,7 @@ block0: ; nextln: mov fp, sp ; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: sub sp, sp, x15, UXTX -; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x0, [x15] +; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -50,7 +50,7 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: load_addr x0, [fp, #-8] +; nextln: sub x0, fp, #8 ; nextln: ldur x0, [x0] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -70,7 +70,7 @@ block0: ; nextln: mov fp, sp ; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: sub sp, sp, x15, UXTX -; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x0, [x15] +; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 ; nextln: ldur x0, [x0] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -88,7 +88,7 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: load_addr x1, [fp, #-8] +; nextln: sub x1, fp, #8 ; nextln: stur x0, [x1] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -108,7 +108,7 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: ldr x15, 8 ; b 12 ; data 100016 ; nextln: sub sp, sp, x15, UXTX -; nextln: movn x15, #34471 ; movk x15, #65534, LSL #16 ; add x15, x15, fp ; load_addr x1, [x15] +; nextln: movz x1, #34472; movk x1, #1, LSL #16; sub x1, fp, x1 ; nextln: stur x0, [x1] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index 0b60439f4d..3977be69f3 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -150,10 +150,16 @@ cfg_if! { .build() } } - Architecture::Aarch64 {..} => Capstone::new() - .arm64() - .mode(arch::arm64::ArchMode::Arm) - .build(), + Architecture::Aarch64 {..} => { + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build() + .map_err(|err| err.to_string())?; + // Inline constants should be skipped + cs.set_skipdata(true).map_err(|err| err.to_string())?; + Ok(cs) + } _ => return Err(String::from("Unknown ISA")), }; From 3528c9e00fd4d31f5746e36448e167603a99adb0 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 17 Apr 2020 19:44:19 +0200 Subject: [PATCH 5/5] Expand comment about set_skipdata --- cranelift/src/disasm.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index 3977be69f3..fdea73ecb5 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -156,7 +156,11 @@ cfg_if! { .mode(arch::arm64::ArchMode::Arm) .build() .map_err(|err| err.to_string())?; - // Inline constants should be skipped + // AArch64 uses inline constants rather than a separate constant pool right now. + // Without this option, Capstone will stop disassembling as soon as it sees + // an inline constant that is not also a valid instruction. With this option, + // Capstone will print a `.byte` directive with the bytes of the inline constant + // and continue to the next instruction. cs.set_skipdata(true).map_err(|err| err.to_string())?; Ok(cs) }