diff --git a/winch/codegen/src/abi/addressing_mode.rs b/winch/codegen/src/abi/addressing_mode.rs deleted file mode 100644 index 5566e292f6..0000000000 --- a/winch/codegen/src/abi/addressing_mode.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! A generic representation for addressing memory. -use crate::isa::reg::Reg; - -// TODO -// Add the other modes -#[derive(Debug, Copy, Clone)] -pub(crate) enum Address { - Base { base: Reg, imm: u32 }, -} - -impl Address { - pub fn base(base: Reg, imm: u32) -> Address { - Address::Base { base, imm } - } -} diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 73a856c15f..0877703916 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -3,8 +3,6 @@ use smallvec::SmallVec; use std::ops::{Add, BitAnd, Not, Sub}; use wasmparser::{FuncType, ValType}; -pub(crate) mod addressing_mode; -pub(crate) use addressing_mode::*; pub(crate) mod local; pub(crate) use local::*; diff --git a/winch/codegen/src/isa/aarch64/address.rs b/winch/codegen/src/isa/aarch64/address.rs new file mode 100644 index 0000000000..8024b8ea25 --- /dev/null +++ b/winch/codegen/src/isa/aarch64/address.rs @@ -0,0 +1,144 @@ +//! Aarch64 addressing mode. + +use anyhow::{anyhow, Context, Result}; +use cranelift_codegen::{ + ir::types, + isa::aarch64::inst::{AMode, PairAMode, SImm7Scaled, SImm9}, +}; + +use super::regs; +use crate::reg::Reg; + +/// Aarch64 indexing mode. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum Indexing { + /// Pre-indexed. + Pre, + /// Post-indexed. + Post, +} + +/// Memory address representation. +#[derive(Debug, Copy, Clone)] +pub(crate) enum Address { + /// Base register with an arbitrary offset. Potentially gets + /// lowered into multiple instructions during code emission + /// depending on the offset. + Offset { + /// Base register. + base: Reg, + /// Offset. + offset: i64, + }, + /// Specialized indexed register and offset variant using + /// the stack pointer. + IndexedSPOffset { + /// Offset. + offset: i64, + /// Indexing mode. + indexing: Indexing, + }, +} + +impl Address { + /// Create a pre-indexed addressing mode from the stack pointer. + pub fn pre_indexed_from_sp(offset: i64) -> Self { + Self::IndexedSPOffset { + offset, + indexing: Indexing::Pre, + } + } + + /// Create a post-indexed addressing mode from the stack pointer. + pub fn post_indexed_from_sp(offset: i64) -> Self { + Self::IndexedSPOffset { + offset, + indexing: Indexing::Post, + } + } + + /// Create an offset addressing mode with + /// the shadow stack pointer register + /// as a base. + pub fn from_shadow_sp(offset: i64) -> Self { + Self::Offset { + base: regs::shadow_sp(), + offset, + } + } + + /// Create register and arbitrary offset addressing mode. + pub fn offset(base: Reg, offset: i64) -> Self { + // This exists to enforce the sp vs shadow_sp invariant, the + // sp generally should not be used as a base register in an + // address. In the cases where its usage is required and where + // we are sure that it's 16-byte aligned, the address should + // be constructed via the `Self::pre_indexed_sp` and + // Self::post_indexed_sp functions. + // For more details around the stack pointer and shadow stack + // pointer see the docs at regs::shadow_sp(). + assert!( + base != regs::sp(), + "stack pointer not allowed in arbitrary offset addressing mode" + ); + Self::Offset { base, offset } + } +} + +// Conversions between `winch-codegen`'s addressing mode representation +// and `cranelift-codegen`s addressing mode representation for aarch64. + +impl TryFrom
for PairAMode { + type Error = anyhow::Error; + + fn try_from(addr: Address) -> Result { + use Address::*; + use Indexing::*; + + match addr { + IndexedSPOffset { offset, indexing } => { + let simm7 = SImm7Scaled::maybe_from_i64(offset, types::I64).with_context(|| { + format!("Failed to convert {} to signed scaled 7 bit offset", offset) + })?; + + if indexing == Pre { + Ok(PairAMode::SPPreIndexed(simm7)) + } else { + Ok(PairAMode::SPPostIndexed(simm7)) + } + } + other => Err(anyhow!( + "Could not convert {:?} to addressing mode for register pairs", + other + )), + } + } +} + +impl TryFrom
for AMode { + type Error = anyhow::Error; + + fn try_from(addr: Address) -> Result { + use Address::*; + use Indexing::*; + + match addr { + IndexedSPOffset { offset, indexing } => { + let simm9 = SImm9::maybe_from_i64(offset).ok_or_else(|| { + anyhow!("Failed to convert {} to signed 9-bit offset", offset) + })?; + + if indexing == Pre { + Ok(AMode::SPPreIndexed { simm9 }) + } else { + Ok(AMode::SPPostIndexed { simm9 }) + } + } + Offset { base, offset } => Ok(AMode::RegOffset { + rn: base.into(), + off: offset, + ty: types::I64, + }), + } + } +} diff --git a/winch/codegen/src/isa/aarch64/asm.rs b/winch/codegen/src/isa/aarch64/asm.rs new file mode 100644 index 0000000000..a8eb741db8 --- /dev/null +++ b/winch/codegen/src/isa/aarch64/asm.rs @@ -0,0 +1,244 @@ +//! Assembler library implementation for Aarch64. + +use super::{address::Address, regs}; +use crate::{masm::OperandSize, reg::Reg}; +use cranelift_codegen::{ + ir::MemFlags, + isa::aarch64::inst::{ + self, + emit::{EmitInfo, EmitState}, + ALUOp, AMode, ExtendOp, Imm12, Inst, PairAMode, + }, + settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable, +}; + +/// An Aarch64 instruction operand. +#[derive(Debug)] +pub(crate) enum Operand { + /// Register. + Reg(Reg), + /// Memory address. + Mem(Address), + /// Immediate. + Imm(i32), +} + +// Conversions between winch-codegen aarch64 types and cranelift-codegen +// aarch64 types. + +impl From for inst::OperandSize { + fn from(size: OperandSize) -> Self { + match size { + OperandSize::S32 => Self::Size32, + OperandSize::S64 => Self::Size64, + } + } +} + +/// Low level assembler implementation for Aarch64. +pub(crate) struct Assembler { + /// The machine instruction buffer. + buffer: MachBuffer, + /// Constant emission information. + emit_info: EmitInfo, + /// Emission state. + emit_state: EmitState, +} + +impl Assembler { + /// Create a new Aarch64 assembler. + pub fn new(shared_flags: settings::Flags) -> Self { + Self { + buffer: MachBuffer::::new(), + emit_state: Default::default(), + emit_info: EmitInfo::new(shared_flags), + } + } +} + +impl Assembler { + /// Return the emitted code. + pub fn finalize(self) -> MachBufferFinalized { + let stencil = self.buffer.finish(); + stencil.apply_base_srcloc(Default::default()) + } + + fn emit(&mut self, inst: Inst) { + inst.emit(&[], &mut self.buffer, &self.emit_info, &mut self.emit_state); + } + + /// Load a constant into a register. + pub fn load_constant(&mut self, imm: u64, rd: Reg) { + let writable = Writable::from_reg(rd.into()); + Inst::load_constant(writable, imm, &mut |_| writable) + .into_iter() + .for_each(|i| self.emit(i)); + } + + /// Store a pair of registers. + pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) { + let mem: PairAMode = addr.try_into().unwrap(); + self.emit(Inst::StoreP64 { + rt: xt1.into(), + rt2: xt2.into(), + mem, + flags: MemFlags::trusted(), + }); + } + + /// Store a register. + pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize) { + let mem: AMode = addr.try_into().unwrap(); + let flags = MemFlags::trusted(); + + use OperandSize::*; + let inst = match size { + S64 => Inst::Store64 { + rd: reg.into(), + mem, + flags, + }, + S32 => Inst::Store32 { + rd: reg.into(), + mem, + flags, + }, + }; + + self.emit(inst); + } + + /// Load a register. + pub fn ldr(&mut self, addr: Address, rd: Reg, size: OperandSize) { + use OperandSize::*; + let writable_reg = Writable::from_reg(rd.into()); + let mem: AMode = addr.try_into().unwrap(); + let flags = MemFlags::trusted(); + + let inst = match size { + S64 => Inst::ULoad64 { + rd: writable_reg, + mem, + flags, + }, + S32 => Inst::ULoad32 { + rd: writable_reg, + mem, + flags, + }, + }; + + self.emit(inst); + } + + /// Load a pair of registers. + pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) { + let writable_xt1 = Writable::from_reg(xt1.into()); + let writable_xt2 = Writable::from_reg(xt2.into()); + let mem = addr.try_into().unwrap(); + + self.emit(Inst::LoadP64 { + rt: writable_xt1, + rt2: writable_xt2, + mem, + flags: MemFlags::trusted(), + }); + } + + /// Move instruction combinations. + pub fn mov(&mut self, src: Operand, dst: Operand, size: OperandSize) { + match &(src, dst) { + (Operand::Imm(imm), Operand::Reg(rd)) => { + let scratch = regs::scratch(); + self.load_constant(*imm as u64, scratch); + self.mov_rr(scratch, *rd, size); + } + (Operand::Reg(src), Operand::Reg(rd)) => { + self.mov_rr(*src, *rd, size); + } + + (src, dst) => panic!( + "Invalid combination for mov: src = {:?}, dst = {:?}", + src, dst + ), + } + } + + /// Register to register move. + pub fn mov_rr(&mut self, rm: Reg, rd: Reg, size: OperandSize) { + let writable_rd = Writable::from_reg(rd.into()); + self.emit(Inst::Mov { + size: size.into(), + rd: writable_rd, + rm: rm.into(), + }); + } + + /// Add instruction combinations. + pub fn add(&mut self, opm: Operand, opn: Operand, opd: Operand, size: OperandSize) { + match &(opm, opn, opd) { + (Operand::Imm(imm), Operand::Reg(rn), Operand::Reg(rd)) => { + self.add_ir(*imm as u64, *rn, *rd, size); + } + (Operand::Reg(rm), Operand::Reg(rn), Operand::Reg(rd)) => { + self.emit_alu_rrr_extend(ALUOp::Add, *rm, *rn, *rd, size); + } + (rm, rn, rd) => panic!( + "Invalid combination for add: rm = {:?}, rn = {:?}, rd = {:?}", + rm, rn, rd + ), + } + } + + /// Add immediate and register. + pub fn add_ir(&mut self, imm: u64, rn: Reg, rd: Reg, size: OperandSize) { + let alu_op = ALUOp::Add; + if let Some(imm) = Imm12::maybe_from_u64(imm) { + self.emit_alu_rri(alu_op, imm, rn, rd, size); + } else { + let scratch = regs::scratch(); + self.load_constant(imm, scratch); + self.emit_alu_rrr_extend(alu_op, scratch, rn, rd, size); + } + } + + /// Subtract immediate and register. + pub fn sub_ir(&mut self, imm: u64, rn: Reg, rd: Reg, size: OperandSize) { + let alu_op = ALUOp::Sub; + if let Some(imm) = Imm12::maybe_from_u64(imm) { + self.emit_alu_rri(alu_op, imm, rn, rd, size); + } else { + let scratch = regs::scratch(); + self.load_constant(imm, scratch); + self.emit_alu_rrr_extend(alu_op, scratch, rn, rd, size); + } + } + + /// Return instruction. + pub fn ret(&mut self) { + self.emit(Inst::Ret { rets: vec![] }); + } + + // Helpers for ALU operations. + + fn emit_alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: Reg, size: OperandSize) { + self.emit(Inst::AluRRImm12 { + alu_op: op, + size: size.into(), + rd: Writable::from_reg(rd.into()), + rn: rn.into(), + imm12: imm, + }); + } + + fn emit_alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: Reg, size: OperandSize) { + self.emit(Inst::AluRRRExtend { + alu_op: op, + size: size.into(), + rd: Writable::from_reg(rd.into()), + rn: rn.into(), + rm: rm.into(), + extendop: ExtendOp::UXTX, + }); + } +} diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index ad2c602e8a..40f2313fe0 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1,57 +1,180 @@ +use super::{ + address::Address, + asm::{Assembler, Operand}, + regs, +}; use crate::{ - abi::{addressing_mode::Address, local::LocalSlot}, + abi::local::LocalSlot, isa::reg::Reg, masm::{MacroAssembler as Masm, OperandSize, RegImm}, }; -use cranelift_codegen::{Final, MachBufferFinalized}; +use cranelift_codegen::{settings, Final, MachBufferFinalized}; -#[derive(Default)] -pub(crate) struct MacroAssembler; +/// Aarch64 MacroAssembler. +pub(crate) struct MacroAssembler { + /// Low level assembler. + asm: Assembler, + /// Stack pointer offset. + sp_offset: u32, +} + +// Conversions between generic masm arguments and aarch64 operands. + +impl From for Operand { + fn from(rimm: RegImm) -> Self { + match rimm { + RegImm::Reg(r) => r.into(), + RegImm::Imm(imm) => Operand::Imm(imm), + } + } +} + +impl From for Operand { + fn from(reg: Reg) -> Self { + Operand::Reg(reg) + } +} + +impl From
for Operand { + fn from(addr: Address) -> Self { + Operand::Mem(addr) + } +} + +impl MacroAssembler { + /// Create an Aarch64 MacroAssembler. + pub fn new(shared_flags: settings::Flags) -> Self { + Self { + asm: Assembler::new(shared_flags), + sp_offset: 0u32, + } + } +} impl Masm for MacroAssembler { + type Address = Address; + fn prologue(&mut self) { - todo!() + let lr = regs::lr(); + let fp = regs::fp(); + let sp = regs::sp(); + let addr = Address::pre_indexed_from_sp(-16); + + self.asm.stp(fp, lr, addr); + self.asm.mov_rr(sp, fp, OperandSize::S64); + self.move_sp_to_shadow_sp(); } - fn epilogue(&mut self, _locals_size: u32) { - todo!() + fn epilogue(&mut self, locals_size: u32) { + assert!(self.sp_offset == locals_size); + + let sp = regs::sp(); + if locals_size > 0 { + self.asm + .add_ir(locals_size as u64, sp, sp, OperandSize::S64); + self.move_sp_to_shadow_sp(); + } + + let lr = regs::lr(); + let fp = regs::fp(); + let addr = Address::post_indexed_from_sp(16); + + self.asm.ldp(fp, lr, addr); + self.asm.ret(); } - fn reserve_stack(&mut self, _bytes: u32) { - todo!() + fn reserve_stack(&mut self, bytes: u32) { + if bytes == 0 { + return; + } + + let sp = regs::sp(); + self.asm.sub_ir(bytes as u64, sp, sp, OperandSize::S64); + self.move_sp_to_shadow_sp(); + + self.increment_sp(bytes); } - fn local_address(&mut self, _local: &LocalSlot) -> Address { - todo!() + fn local_address(&mut self, local: &LocalSlot) -> Address { + let (reg, offset) = local + .addressed_from_sp() + .then(|| { + let offset = self.sp_offset.checked_sub(local.offset).expect(&format!( + "Invalid local offset = {}; sp offset = {}", + local.offset, self.sp_offset + )); + (regs::shadow_sp(), offset) + }) + .unwrap_or((regs::fp(), local.offset)); + + Address::offset(reg, offset as i64) } - fn store(&mut self, _src: RegImm, _dst: Address, _size: OperandSize) { - todo!() + fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { + let src = match src { + RegImm::Imm(imm) => { + let scratch = regs::scratch(); + self.asm.load_constant(imm as u64, scratch); + scratch + } + RegImm::Reg(reg) => reg, + }; + + self.asm.str(src, dst, size); } - fn load(&mut self, _src: Address, _dst: Reg, _size: OperandSize) {} + fn load(&mut self, src: Address, dst: Reg, size: OperandSize) { + self.asm.ldr(src, dst, size); + } fn sp_offset(&mut self) -> u32 { - 0u32 + self.sp_offset } fn finalize(self) -> MachBufferFinalized { - todo!() + self.asm.finalize() } - fn mov(&mut self, _src: RegImm, _dst: RegImm, _size: OperandSize) { - todo!() + fn mov(&mut self, src: RegImm, dst: RegImm, size: OperandSize) { + self.asm.mov(src.into(), dst.into(), size); } - fn add(&mut self, _dst: RegImm, __lhs: RegImm, __rhs: RegImm, _size: OperandSize) { - todo!() + fn add(&mut self, dst: RegImm, lhs: RegImm, rhs: RegImm, size: OperandSize) { + self.asm.add(rhs.into(), lhs.into(), dst.into(), size); } - fn zero(&mut self, _reg: Reg) { - todo!() + fn zero(&mut self, reg: Reg) { + self.asm.load_constant(0, reg); } - fn push(&mut self, _reg: Reg) -> u32 { - todo!() + fn push(&mut self, reg: Reg) -> u32 { + // The push is counted as pushing the 64-bit width in + // 64-bit architectures. + let size = 8u32; + self.reserve_stack(size); + let address = Address::from_shadow_sp(size as i64); + self.asm.str(reg, address, OperandSize::S64); + + self.sp_offset + } +} + +impl MacroAssembler { + fn increment_sp(&mut self, bytes: u32) { + self.sp_offset += bytes; + } + + // Copies the value of the stack pointer to the shadow stack + // pointer: mov x28, sp + + // This function is usually called whenever the real stack pointer + // changes, for example after allocating or deallocating stack + // space, or after performing a push or pop. + // For more details around the stack pointer and shadow stack + // pointer see the docs at regs::shadow_sp(). + fn move_sp_to_shadow_sp(&mut self) { + let sp = regs::sp(); + let shadow_sp = regs::shadow_sp(); + self.asm.mov_rr(sp, shadow_sp, OperandSize::S64); } } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index 8865afedee..7e62e1aeea 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -1,12 +1,25 @@ -use crate::isa::{Builder, TargetIsa}; +use self::regs::{scratch, ALL_GPR}; +use crate::{ + abi::ABI, + codegen::{CodeGen, CodeGenContext}, + frame::Frame, + isa::{Builder, TargetIsa}, + masm::MacroAssembler, + regalloc::RegAlloc, + regset::RegSet, + stack::Stack, +}; use anyhow::Result; use cranelift_codegen::{ isa::aarch64::settings as aarch64_settings, settings::Flags, Final, MachBufferFinalized, }; +use masm::MacroAssembler as Aarch64Masm; use target_lexicon::Triple; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; mod abi; +mod address; +mod asm; mod masm; mod regs; @@ -36,7 +49,7 @@ pub(crate) struct Aarch64 { } impl Aarch64 { - /// Create a Aarch64 ISA. + /// Create an Aarch64 ISA. pub fn new(triple: Triple, shared_flags: Flags, isa_flags: aarch64_settings::Flags) -> Self { Self { isa_flags, @@ -57,10 +70,22 @@ impl TargetIsa for Aarch64 { fn compile_function( &self, - _sig: &FuncType, - _body: &FunctionBody, - mut _validator: FuncValidator, + sig: &FuncType, + body: &FunctionBody, + mut validator: FuncValidator, ) -> Result> { - todo!() + let mut body = body.get_binary_reader(); + let mut masm = Aarch64Masm::new(self.shared_flags.clone()); + let stack = Stack::new(); + let abi = abi::Aarch64ABI::default(); + let abi_sig = abi.sig(sig); + let frame = Frame::new(&abi_sig, &mut body, &mut validator, &abi)?; + // TODO: Add floating point bitmask + let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch()); + let codegen_context = CodeGenContext::new(&mut masm, stack, &frame); + let mut codegen = CodeGen::new::(codegen_context, abi_sig, regalloc); + + codegen.emit(&mut body, validator)?; + Ok(masm.finalize()) } } diff --git a/winch/codegen/src/isa/aarch64/regs.rs b/winch/codegen/src/isa/aarch64/regs.rs index 971e7f10bc..4c827a0b85 100644 --- a/winch/codegen/src/isa/aarch64/regs.rs +++ b/winch/codegen/src/isa/aarch64/regs.rs @@ -1,11 +1,11 @@ -//! AArch64 register definition +//! AArch64 register definition. use crate::isa::reg::Reg; use regalloc2::{PReg, RegClass}; /// Construct a X-register from an index. pub(crate) const fn xreg(num: u8) -> Reg { - assert!(num < 31); + assert!(num < 32); Reg::new(PReg::new(num as usize, RegClass::Int)) } @@ -14,3 +14,124 @@ pub(crate) const fn vreg(num: u8) -> Reg { assert!(num < 32); Reg::new(PReg::new(num as usize, RegClass::Float)) } + +/// Scratch register. +/// Intra-procedure-call corruptible register. +pub(crate) const fn ip0() -> Reg { + xreg(16) +} + +/// Alias to the IP0 register. +pub(crate) const fn scratch() -> Reg { + ip0() +} + +/// Scratch register. +/// Intra-procedure-call corruptible register. +pub(crate) const fn ip1() -> Reg { + xreg(17) +} + +/// Register used to carry platform state. +const fn platform() -> Reg { + xreg(18) +} + +/// Frame pointer register. +pub(crate) const fn fp() -> Reg { + xreg(29) +} + +/// Link register for function calls. +pub(crate) const fn lr() -> Reg { + xreg(30) +} + +/// Zero register. +pub(crate) const fn zero() -> Reg { + xreg(31) +} + +/// Stack pointer register. +/// +/// In aarch64 the zero and stack pointer registers are contextually +/// different but have the same hardware encoding; to differentiate +/// them, we are following Cranelift's encoding and representing it as +/// 31 + 32. Ref: +/// https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/codegen/src/isa/aarch64/inst/regs.rs#L70 +pub(crate) const fn sp() -> Reg { + Reg::new(PReg::new(31 + 32, RegClass::Int)) +} + +/// Shadow stack pointer register. +/// +/// The shadow stack pointer is used as the base for memory addressing +/// to workaround Aarch64's constraint on the stack pointer 16-byte +/// alignment for memory addressing. This allows word-size loads and +/// stores. It's always assumed that the real stack pointer is +/// 16-byte unaligned; the only exceptions to this assumption are the function +/// prologue and epilogue in which we use the real stack pointer for +/// addressing, assuming that the 16-byte alignment is respected. +/// +/// The fact that the shadow stack pointer is used for memory +/// addressing, doesn't change the meaning of the real stack pointer, +/// which should always be used to allocate and deallocate stack +/// space. The real stack pointer is always treated as "primary". +/// Throughout the code generation any change to the stack pointer is +/// reflected in the shadow stack pointer via the +/// [MacroAssembler::move_sp_to_shadow_sp] function. +/// +/// This approach, requires copying the real stack pointer value into +/// x28 everytime the real stack pointer moves, which involves +/// emitting one more instruction. For example, this is generally how +/// the real stack pointer and x28 will look like during a function: +/// +/// +-----------+ +/// | | Save x28 (callee-saved) +/// +-----------+----- SP at function entry (after epilogue, slots for FP and LR) +/// | | Copy the value of SP to x28 +/// | | +/// +-----------+----- SP after reserving stack space for locals and arguments +/// | | Copy the value of SP to x28 +/// | | +/// +-----------+----- SP after a push +/// | | Copy the value of SP to x28 (similar after a pop) +/// | | +/// | | +/// | | +/// | | +/// +-----------+----- At epilogue restore x28 (callee-saved) +/// +-----------+ +/// +/// In summary, the following invariants must be respected: +/// +/// * The real stack pointer is always primary, and must be used to +/// allocate and deallocate stack space(e.g. push, pop). This +/// operation must always be followed by a copy of the real stack +/// pointer to x28. +/// * The real stack pointer must never be used to +/// address memory except when we are certain that the required +/// alignment is respected (e.g. during the prologue and epilogue) +/// * The value of the real stack pointer is copied to x28 when +/// entering a function. +/// * The value of x28 doesn't change between +/// function calls (as it's callee saved), compliant with +/// Aarch64's ABI. +/// * x28 is not available during register allocation. +/// * Since the real stack pointer is always primary, there's no need +/// to copy the shadow stack pointer into the real stack +/// pointer. The copy is only done SP -> Shadow SP direction. +pub(crate) const fn shadow_sp() -> Reg { + xreg(28) +} + +const NON_ALLOCATABLE_GPR: u32 = (1 << ip0().hw_enc()) + | (1 << ip1().hw_enc()) + | (1 << platform().hw_enc()) + | (1 << fp().hw_enc()) + | (1 << lr().hw_enc()) + | (1 << zero().hw_enc()) + | (1 << shadow_sp().hw_enc()); + +/// Bitmask to represent the available general purpose registers. +pub(crate) const ALL_GPR: u32 = u32::MAX & !NON_ALLOCATABLE_GPR; diff --git a/winch/codegen/src/isa/reg.rs b/winch/codegen/src/isa/reg.rs index c7e47897e2..8f933aedac 100644 --- a/winch/codegen/src/isa/reg.rs +++ b/winch/codegen/src/isa/reg.rs @@ -28,7 +28,7 @@ impl Reg { } /// Get the encoding of the underlying register. - pub fn hw_enc(self) -> u8 { + pub const fn hw_enc(self) -> u8 { self.0.hw_enc() as u8 } diff --git a/winch/codegen/src/isa/x64/address.rs b/winch/codegen/src/isa/x64/address.rs new file mode 100644 index 0000000000..4a3e26ebc3 --- /dev/null +++ b/winch/codegen/src/isa/x64/address.rs @@ -0,0 +1,17 @@ +//! x64 addressing mode. + +use crate::reg::Reg; + +/// Memory address representation. +#[derive(Debug, Copy, Clone)] +pub(crate) enum Address { + /// Base register with an immediate offset. + Offset { base: Reg, offset: u32 }, +} + +impl Address { + /// Create an offset + pub fn offset(base: Reg, offset: u32) -> Self { + Self::Offset { base, offset } + } +} diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index f20c88da92..6734a35d4e 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -1,6 +1,6 @@ //! Assembler library implementation for x64. -use crate::{abi::Address, isa::reg::Reg, masm::OperandSize}; +use crate::{isa::reg::Reg, masm::OperandSize}; use cranelift_codegen::{ isa::x64::{ args::{ @@ -12,6 +12,8 @@ use cranelift_codegen::{ settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, Writable, }; +use super::address::Address; + /// A x64 instruction operand. #[derive(Debug, Copy, Clone)] pub(crate) enum Operand { @@ -95,14 +97,16 @@ impl Assembler { match &(src, dst) { (Reg(lhs), Reg(rhs)) => self.mov_rr(*lhs, *rhs, size), (Reg(lhs), Mem(addr)) => match addr { - Address::Base { base, imm } => self.mov_rm(*lhs, *base, *imm, size), + Address::Offset { base, offset: imm } => self.mov_rm(*lhs, *base, *imm, size), }, (Imm(imm), Mem(addr)) => match addr { - Address::Base { base, imm: disp } => self.mov_im(*imm as u64, *base, *disp, size), + Address::Offset { base, offset: disp } => { + self.mov_im(*imm as u64, *base, *disp, size) + } }, (Imm(imm), Reg(reg)) => self.mov_ir(*imm as u64, *reg, size), (Mem(addr), Reg(reg)) => match addr { - Address::Base { base, imm } => self.mov_mr(*base, *imm, *reg, size), + Address::Offset { base, offset: imm } => self.mov_mr(*base, *imm, *reg, size), }, _ => panic!( diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 3f73aaf64b..beb1481b50 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -1,8 +1,9 @@ use super::{ + address::Address, asm::{Assembler, Operand}, regs::{rbp, rsp}, }; -use crate::abi::{Address, LocalSlot}; +use crate::abi::LocalSlot; use crate::isa::reg::Reg; use crate::masm::{MacroAssembler as Masm, OperandSize, RegImm}; use cranelift_codegen::{isa::x64::settings as x64_settings, settings, Final, MachBufferFinalized}; @@ -39,6 +40,8 @@ impl From
for Operand { } impl Masm for MacroAssembler { + type Address = Address; + fn prologue(&mut self) { let frame_pointer = rbp(); let stack_pointer = rsp(); @@ -79,7 +82,7 @@ impl Masm for MacroAssembler { }) .unwrap_or((rbp(), local.offset)); - Address::base(reg, offset) + Address::offset(reg, offset) } fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 7412f9444a..3f72c2c5f0 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -19,6 +19,7 @@ use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; use self::regs::ALL_GPR; mod abi; +mod address; mod asm; mod masm; // Not all the fpr and gpr constructors are used at the moment; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 4e12358353..030c56cc66 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1,9 +1,8 @@ -use crate::abi::align_to; -use crate::abi::{Address, LocalSlot}; +use crate::abi::{align_to, LocalSlot}; use crate::isa::reg::Reg; use crate::regalloc::RegAlloc; use cranelift_codegen::{Final, MachBufferFinalized}; -use std::ops::Range; +use std::{fmt::Debug, ops::Range}; /// Operand size, in bits. #[derive(Copy, Clone, Eq, PartialEq)] @@ -59,6 +58,9 @@ impl From for RegImm { /// where needed, in the case of architectures that use a two-argument form. pub(crate) trait MacroAssembler { + /// The addressing mode. + type Address; + /// Emit the function prologue. fn prologue(&mut self); @@ -69,16 +71,16 @@ pub(crate) trait MacroAssembler { fn reserve_stack(&mut self, bytes: u32); /// Get the address of a local slot. - fn local_address(&mut self, local: &LocalSlot) -> Address; + fn local_address(&mut self, local: &LocalSlot) -> Self::Address; /// Get stack pointer offset. fn sp_offset(&mut self) -> u32; /// Perform a stack store. - fn store(&mut self, src: RegImm, dst: Address, size: OperandSize); + fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize); /// Perform a stack load. - fn load(&mut self, src: Address, dst: Reg, size: OperandSize); + fn load(&mut self, src: Self::Address, dst: Reg, size: OperandSize); /// Perform a move. fn mov(&mut self, src: RegImm, dst: RegImm, size: OperandSize); @@ -111,7 +113,7 @@ pub(crate) trait MacroAssembler { // Ensure that the start of the range is at least 4-byte aligned. assert!(mem.start % 4 == 0); let start = align_to(mem.start, word_size); - let addr = self.local_address(&LocalSlot::i32(start)); + let addr: Self::Address = self.local_address(&LocalSlot::i32(start)); self.store(RegImm::imm(0), addr, OperandSize::S32); // Ensure that the new start of the range, is word-size aligned. assert!(start % word_size == 0); @@ -123,7 +125,7 @@ pub(crate) trait MacroAssembler { if slots == 1 { let slot = LocalSlot::i64(start + word_size); - let addr = self.local_address(&slot); + let addr: Self::Address = self.local_address(&slot); self.store(RegImm::imm(0), addr, OperandSize::S64); } else { // TODO @@ -136,7 +138,7 @@ pub(crate) trait MacroAssembler { for step in (start..end).into_iter().step_by(word_size as usize) { let slot = LocalSlot::i64(step + word_size); - let addr = self.local_address(&slot); + let addr: Self::Address = self.local_address(&slot); self.store(zero, addr, OperandSize::S64); } } diff --git a/winch/filetests/filetests/aarch64/basic_add.wat b/winch/filetests/filetests/aarch64/basic_add.wat new file mode 100644 index 0000000000..8c8e9f84d6 --- /dev/null +++ b/winch/filetests/filetests/aarch64/basic_add.wat @@ -0,0 +1,18 @@ +;;! target = "aarch64" + +(module + (export "main" (func $main)) + + (func $main (result i32) + (i32.const 10) + (i32.const 20) + i32.add) +) +;; 0: fd7bbfa9 stp x29, x30, [sp, #-0x10]! +;; 4: fd030091 mov x29, sp +;; 8: fc030091 mov x28, sp +;; c: 500180d2 mov x16, #0xa +;; 10: e003102a mov w0, w16 +;; 14: 00500011 add w0, w0, #0x14 +;; 18: fd7bc1a8 ldp x29, x30, [sp], #0x10 +;; 1c: c0035fd6 ret diff --git a/winch/filetests/filetests/aarch64/basic_add_with_locals.wat b/winch/filetests/filetests/aarch64/basic_add_with_locals.wat new file mode 100644 index 0000000000..4d9f5577ee --- /dev/null +++ b/winch/filetests/filetests/aarch64/basic_add_with_locals.wat @@ -0,0 +1,38 @@ +;;! target = "aarch64" + +(module + (export "main" (func $main)) + + (func $main (result i32) + (local $foo i32) + (local $bar i32) + (i32.const 10) + (local.set $foo) + (i32.const 20) + (local.set $bar) + + (local.get $foo) + (local.get $bar) + i32.add) +) +;; 0: fd7bbfa9 stp x29, x30, [sp, #-0x10]! +;; 4: fd030091 mov x29, sp +;; 8: fc030091 mov x28, sp +;; c: ff2300d1 sub sp, sp, #8 +;; 10: fc030091 mov x28, sp +;; 14: 100080d2 mov x16, #0 +;; 18: 900300f8 stur x16, [x28] +;; 1c: 500180d2 mov x16, #0xa +;; 20: e003102a mov w0, w16 +;; 24: 804300b8 stur w0, [x28, #4] +;; 28: 900280d2 mov x16, #0x14 +;; 2c: e003102a mov w0, w16 +;; 30: 800300b8 stur w0, [x28] +;; 34: 800340b8 ldur w0, [x28] +;; 38: 814340b8 ldur w1, [x28, #4] +;; 3c: 2160200b add w1, w1, w0, uxtx +;; 40: e00301aa mov x0, x1 +;; 44: ff230091 add sp, sp, #8 +;; 48: fc030091 mov x28, sp +;; 4c: fd7bc1a8 ldp x29, x30, [sp], #0x10 +;; 50: c0035fd6 ret diff --git a/winch/filetests/filetests/aarch64/basic_add_with_params.wat b/winch/filetests/filetests/aarch64/basic_add_with_params.wat new file mode 100644 index 0000000000..3f8fff92ad --- /dev/null +++ b/winch/filetests/filetests/aarch64/basic_add_with_params.wat @@ -0,0 +1,25 @@ +;;! target = "aarch64" + +(module + (export "main" (func $main)) + + (func $main (param i32) (param i32) (result i32) + (local.get 0) + (local.get 1) + i32.add) +) +;; 0: fd7bbfa9 stp x29, x30, [sp, #-0x10]! +;; 4: fd030091 mov x29, sp +;; 8: fc030091 mov x28, sp +;; c: ff2300d1 sub sp, sp, #8 +;; 10: fc030091 mov x28, sp +;; 14: 804300b8 stur w0, [x28, #4] +;; 18: 810300b8 stur w1, [x28] +;; 1c: 800340b8 ldur w0, [x28] +;; 20: 814340b8 ldur w1, [x28, #4] +;; 24: 2160200b add w1, w1, w0, uxtx +;; 28: e00301aa mov x0, x1 +;; 2c: ff230091 add sp, sp, #8 +;; 30: fc030091 mov x28, sp +;; 34: fd7bc1a8 ldp x29, x30, [sp], #0x10 +;; 38: c0035fd6 ret diff --git a/winch/filetests/filetests/aarch64/simple.wat b/winch/filetests/filetests/aarch64/simple.wat new file mode 100644 index 0000000000..a55aae1e96 --- /dev/null +++ b/winch/filetests/filetests/aarch64/simple.wat @@ -0,0 +1,15 @@ +;;! target = "aarch64" + +(module + (func (result i32) + (i32.const 42) + ) +) + +;; 0: fd7bbfa9 stp x29, x30, [sp, #-0x10]! +;; 4: fd030091 mov x29, sp +;; 8: fc030091 mov x28, sp +;; c: 500580d2 mov x16, #0x2a +;; 10: e00310aa mov x0, x16 +;; 14: fd7bc1a8 ldp x29, x30, [sp], #0x10 +;; 18: c0035fd6 ret diff --git a/winch/filetests/src/disasm.rs b/winch/filetests/src/disasm.rs index ee75e49978..8b816430ec 100644 --- a/winch/filetests/src/disasm.rs +++ b/winch/filetests/src/disasm.rs @@ -52,6 +52,18 @@ fn disassembler_for(isa: &dyn TargetIsa) -> Result { .build() .map_err(|e| anyhow::format_err!("{}", e))?, + Architecture::Aarch64 { .. } => { + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build() + .map_err(|e| anyhow::format_err!("{}", e))?; + + cs.set_skipdata(true) + .map_err(|e| anyhow::format_err!("{}", e))?; + cs + } + _ => bail!("Unsupported ISA"), };