Register allocation V2

This lays the groundwork for other on-the-fly optimisations,
like passing literals through in order to do const folding
in linear time, while compiling.
This commit is contained in:
Jef
2018-12-13 16:05:24 +01:00
parent 4994e3671c
commit 17ecd049a1
4 changed files with 590 additions and 229 deletions

View File

@@ -10,6 +10,7 @@ const WORD_SIZE: u32 = 8;
type GPR = u8; type GPR = u8;
#[derive(Copy, Clone)]
struct GPRs { struct GPRs {
bits: u16, bits: u16,
} }
@@ -36,13 +37,19 @@ const R12: u8 = 12;
const R13: u8 = 13; const R13: u8 = 13;
const R14: u8 = 14; const R14: u8 = 14;
const R15: u8 = 15; const R15: u8 = 15;
const NUM_GPRS: u8 = 16;
impl GPRs { impl GPRs {
fn take(&mut self) -> GPR { fn take(&mut self) -> GPR {
let lz = self.bits.trailing_zeros(); let lz = self.bits.trailing_zeros();
assert!(lz < 32, "ran out of free GPRs"); assert!(lz < 16, "ran out of free GPRs");
self.bits &= !(1 << lz); let gpr = lz as GPR;
lz as GPR self.mark_used(gpr);
gpr
}
fn mark_used(&mut self, gpr: GPR) {
self.bits &= !(1 << gpr as u16);
} }
fn release(&mut self, gpr: GPR) { fn release(&mut self, gpr: GPR) {
@@ -50,62 +57,80 @@ impl GPRs {
self.bits |= 1 << gpr; self.bits |= 1 << gpr;
} }
fn free_count(&self) -> u32 {
self.bits.count_ones()
}
fn is_free(&self, gpr: GPR) -> bool { fn is_free(&self, gpr: GPR) -> bool {
(self.bits & (1 << gpr)) != 0 (self.bits & (1 << gpr)) != 0
} }
} }
#[derive(Copy, Clone)]
pub struct Registers { pub struct Registers {
scratch_gprs: GPRs, scratch: GPRs,
}
impl Default for Registers {
fn default() -> Self {
Self::new()
}
} }
impl Registers { impl Registers {
pub fn new() -> Self { pub fn new() -> Self {
let mut result = Self { let mut result = Self {
scratch_gprs: GPRs::new(), scratch: GPRs::new(),
}; };
// Give ourselves a few scratch registers to work with, for now. // Give ourselves a few scratch registers to work with, for now.
result.release_scratch_gpr(RAX); for &scratch in SCRATCH_REGS {
result.release_scratch_gpr(RCX); result.release_scratch_gpr(scratch);
result.release_scratch_gpr(RDX); }
result result
} }
// TODO: Add function that takes a scratch register if possible
// but otherwise gives a fresh stack location.
pub fn take_scratch_gpr(&mut self) -> GPR { pub fn take_scratch_gpr(&mut self) -> GPR {
self.scratch_gprs.take() self.scratch.take()
} }
pub fn release_scratch_gpr(&mut self, gpr: GPR) { pub fn release_scratch_gpr(&mut self, gpr: GPR) {
self.scratch_gprs.release(gpr); self.scratch.release(gpr);
}
pub fn is_free(&self, gpr: GPR) -> bool {
self.scratch.is_free(gpr)
}
pub fn free_scratch(&self) -> u32 {
self.scratch.free_count()
} }
} }
/// Describes location of a argument. /// Describes location of a value.
#[derive(Debug)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum ArgLocation { enum ValueLocation {
/// Argument is passed via some register. /// Value exists in a register.
Reg(GPR), Reg(GPR),
/// Value is passed thru the stack. /// Value exists on the stack. This is an offset relative to the
/// first local, and so will have to be adjusted with `adjusted_offset`
/// before reading (as RSP may have been changed by `push`/`pop`).
Stack(i32), Stack(i32),
} }
// TODO: This assumes only system-v calling convention. // TODO: This assumes only system-v calling convention.
// In system-v calling convention the first 6 arguments are passed via registers. // In system-v calling convention the first 6 arguments are passed via registers.
// All rest arguments are passed on the stack. // All rest arguments are passed on the stack.
const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
// RAX is reserved for return values. In the future we want a system to allow
/// Get a location for an argument at the given position. // use of specific registers by saving/restoring them. This would allow using
fn abi_loc_for_arg(pos: u32) -> ArgLocation { // RAX as a scratch register when we're not calling a function, and would also
if let Some(&reg) = ARGS_IN_GPRS.get(pos as usize) { // allow us to call instructions that require specific registers.
ArgLocation::Reg(reg) //
} else { // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
let stack_pos = pos - ARGS_IN_GPRS.len() as u32; const SCRATCH_REGS: &[GPR] = &[R10, R11];
// +2 is because the first argument is located right after the saved frame pointer slot
// and the incoming return address.
let stack_offset = ((stack_pos + 2) * WORD_SIZE) as i32;
ArgLocation::Stack(stack_offset)
}
}
pub struct CodeGenSession { pub struct CodeGenSession {
assembler: Assembler, assembler: Assembler,
@@ -138,8 +163,8 @@ impl CodeGenSession {
Context { Context {
asm: &mut self.assembler, asm: &mut self.assembler,
func_starts: &self.func_starts, func_starts: &self.func_starts,
regs: Registers::new(), block_state: Default::default(),
sp_depth: StackDepth(0), locals: Default::default(),
} }
} }
@@ -177,14 +202,78 @@ impl TranslatedCodeSection {
} }
} }
// TODO: Immediates? We could implement on-the-fly const folding
#[derive(Copy, Clone)]
enum Value {
Local(u32),
Temp(GPR),
}
impl Value {
fn location(&self, locals: &Locals) -> ValueLocation {
match *self {
Value::Local(loc) => local_location(locals, loc),
Value::Temp(reg) => ValueLocation::Reg(reg),
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum StackValue {
Local(u32),
Temp(GPR),
Pop,
}
impl StackValue {
fn location(&self, locals: &Locals) -> Option<ValueLocation> {
match *self {
StackValue::Local(loc) => Some(local_location(locals, loc)),
StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)),
StackValue::Pop => None,
}
}
}
#[derive(Default)]
struct Locals {
// TODO: Use `ArrayVec` since we have a hard maximum (the number of registers)
locs: Vec<ValueLocation>,
}
#[derive(Default, Clone)]
pub struct BlockState {
stack: Stack,
depth: StackDepth,
regs: Registers,
}
fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 {
(ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset
}
fn local_location(locals: &Locals, index: u32) -> ValueLocation {
locals
.locs
.get(index as usize)
.cloned()
.unwrap_or(ValueLocation::Stack(
(index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _,
))
}
type Stack = Vec<StackValue>;
pub struct Context<'a> { pub struct Context<'a> {
asm: &'a mut Assembler, asm: &'a mut Assembler,
func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>, func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>,
regs: Registers,
/// Each push and pop on the value stack increments or decrements this value by 1 respectively. /// Each push and pop on the value stack increments or decrements this value by 1 respectively.
sp_depth: StackDepth, block_state: BlockState,
locals: Locals,
} }
impl<'a> Context<'a> {}
/// Label in code. /// Label in code.
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Label(DynamicLabel); pub struct Label(DynamicLabel);
@@ -203,7 +292,7 @@ pub fn define_label(ctx: &mut Context, label: Label) {
} }
/// Offset from starting value of SP counted in words. /// Offset from starting value of SP counted in words.
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct StackDepth(u32); pub struct StackDepth(u32);
impl StackDepth { impl StackDepth {
@@ -216,146 +305,298 @@ impl StackDepth {
} }
} }
pub fn current_stack_depth(ctx: &Context) -> StackDepth { pub fn current_block_state(ctx: &Context) -> BlockState {
ctx.sp_depth ctx.block_state.clone()
} }
pub fn restore_stack_depth(ctx: &mut Context, stack_depth: StackDepth) { pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) {
ctx.sp_depth = stack_depth; ctx.block_state = block_state;
} }
fn push_i32(ctx: &mut Context, gpr: GPR) { pub fn push_return_value(ctx: &mut Context) {
// For now, do an actual push (and pop below). In the future, we could ctx.block_state.stack.push(StackValue::Temp(RAX));
// do on-the-fly register allocation here. }
ctx.sp_depth.reserve(1);
fn push_i32(ctx: &mut Context, value: Value) {
let stack_loc = match value {
Value::Local(loc) => StackValue::Local(loc),
Value::Temp(gpr) => {
if ctx.block_state.regs.free_scratch() >= 1 {
StackValue::Temp(gpr)
} else {
ctx.block_state.depth.reserve(1);
dynasm!(ctx.asm dynasm!(ctx.asm
; push Rq(gpr) ; push Rq(gpr)
); );
ctx.regs.release_scratch_gpr(gpr); ctx.block_state.regs.release_scratch_gpr(gpr);
StackValue::Pop
}
}
};
ctx.block_state.stack.push(stack_loc);
} }
fn pop_i32(ctx: &mut Context) -> GPR { fn pop_i32(ctx: &mut Context) -> Value {
ctx.sp_depth.free(1); match ctx.block_state.stack.pop().expect("Stack is empty") {
let gpr = ctx.regs.take_scratch_gpr(); StackValue::Local(loc) => Value::Local(loc),
StackValue::Temp(reg) => Value::Temp(reg),
StackValue::Pop => {
ctx.block_state.depth.free(1);
let gpr = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm dynasm!(ctx.asm
; pop Rq(gpr) ; pop Rq(gpr)
); );
gpr Value::Temp(gpr)
}
}
} }
fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) {
let val = pop_i32(ctx);
let val_loc = val.location(&ctx.locals);
copy_value(ctx, val_loc, dst);
free_val(ctx, val);
}
fn free_val(ctx: &mut Context, val: Value) {
match val {
Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg),
Value::Local(_) => {}
}
}
/// Puts this value into a register so that it can be efficiently read
fn into_reg(ctx: &mut Context, val: Value) -> GPR {
match val.location(&ctx.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
let scratch = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm
; mov Rq(scratch), [rsp + offset]
);
scratch
}
ValueLocation::Reg(reg) => reg,
}
}
/// Puts this value into a temporary register so that operations
/// on that register don't write to a local.
fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
match val {
Value::Local(loc) => {
let scratch = ctx.block_state.regs.take_scratch_gpr();
match local_location(&ctx.locals, loc) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; mov Rq(scratch), [rsp + offset]
);
}
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; mov Rq(scratch), Rq(reg)
);
}
}
scratch
}
Value::Temp(reg) => reg,
}
}
// TODO: For the commutative instructions we can do operands in either
// order, so we can choose the operand order that creates the
// least unnecessary temps.
pub fn i32_add(ctx: &mut Context) { pub fn i32_add(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; add Rd(op1), Rd(op0) ; add Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; add Rd(op1), [rsp + offset]
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn i32_sub(ctx: &mut Context) { pub fn i32_sub(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; sub Rd(op1), Rd(op0) ; sub Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; sub Rd(op1), [rsp + offset]
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn i32_and(ctx: &mut Context) { pub fn i32_and(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; and Rd(op1), Rd(op0) ; and Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; and Rd(op1), [rsp + offset]
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn i32_or(ctx: &mut Context) { pub fn i32_or(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; or Rd(op1), Rd(op0) ; or Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; or Rd(op1), [rsp + offset]
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn i32_xor(ctx: &mut Context) { pub fn i32_xor(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rd(op1), Rd(op0) ; xor Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; xor Rd(op1), [rsp + offset]
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn i32_mul(ctx: &mut Context) { pub fn i32_mul(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let tmp = pop_i32(ctx);
let op1 = into_temp_reg(ctx, tmp);
match op0.location(&ctx.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; imul Rd(op1), Rd(op0) ; imul Rd(op1), Rd(reg)
); );
push_i32(ctx, op1); }
ctx.regs.release_scratch_gpr(op0); ValueLocation::Stack(offset) => {
} let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
fn sp_relative_offset(ctx: &mut Context, slot_idx: u32) -> i32 { ; imul Rd(op1), [rsp + offset]
((ctx.sp_depth.0 as i32) + slot_idx as i32) * WORD_SIZE as i32 );
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_val(ctx, op0);
} }
pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
let gpr = ctx.regs.take_scratch_gpr(); push_i32(ctx, Value::Local(local_idx));
let offset = sp_relative_offset(ctx, local_idx);
dynasm!(ctx.asm
; mov Rq(gpr), [rsp + offset]
);
push_i32(ctx, gpr);
} }
// TODO: We can put locals that were spilled to the stack
// back into registers here.
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
let gpr = pop_i32(ctx); let val = pop_i32(ctx);
let offset = sp_relative_offset(ctx, local_idx); let val_loc = val.location(&ctx.locals);
dynasm!(ctx.asm let dst_loc = local_location(&ctx.locals, local_idx);
; mov [rsp + offset], Rq(gpr) copy_value(ctx, val_loc, dst_loc);
); free_val(ctx, val);
ctx.regs.release_scratch_gpr(gpr);
} }
// TODO: Don't store literals at all, roll them into `Value`
pub fn literal_i32(ctx: &mut Context, imm: i32) { pub fn literal_i32(ctx: &mut Context, imm: i32) {
let gpr = ctx.regs.take_scratch_gpr(); let gpr = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm dynasm!(ctx.asm
; mov Rd(gpr), imm ; mov Rd(gpr), imm
); );
push_i32(ctx, gpr); push_i32(ctx, Value::Temp(gpr));
} }
pub fn relop_eq_i32(ctx: &mut Context) { pub fn relop_eq_i32(ctx: &mut Context) {
let right = pop_i32(ctx); let right = pop_i32(ctx);
let left = pop_i32(ctx); let left = pop_i32(ctx);
let result = ctx.regs.take_scratch_gpr(); let result = ctx.block_state.regs.take_scratch_gpr();
let lreg = into_reg(ctx, left);
match right.location(&ctx.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rq(result), Rq(result)
; cmp Rd(left), Rd(right) ; cmp Rd(lreg), [rsp + offset]
; sete Rb(result) ; sete Rb(result)
); );
push_i32(ctx, result); }
ctx.regs.release_scratch_gpr(left); ValueLocation::Reg(rreg) => {
ctx.regs.release_scratch_gpr(right); dynasm!(ctx.asm
; xor Rq(result), Rq(result)
; cmp Rd(lreg), Rd(rreg)
; sete Rb(result)
);
}
}
push_i32(ctx, Value::Temp(result));
free_val(ctx, left);
free_val(ctx, right);
} }
/// Pops i32 predicate and branches to the specified label /// Pops i32 predicate and branches to the specified label
/// if the predicate is equal to zero. /// if the predicate is equal to zero.
pub fn pop_and_breq(ctx: &mut Context, label: Label) { pub fn pop_and_breq(ctx: &mut Context, label: Label) {
let predicate = pop_i32(ctx); let val = pop_i32(ctx);
let predicate = into_temp_reg(ctx, val);
dynasm!(ctx.asm dynasm!(ctx.asm
; test Rd(predicate), Rd(predicate) ; test Rd(predicate), Rd(predicate)
; je =>label.0 ; je =>label.0
); );
ctx.regs.release_scratch_gpr(predicate); ctx.block_state.regs.release_scratch_gpr(predicate);
} }
/// Branch unconditionally to the specified label. /// Branch unconditionally to the specified label.
@@ -366,122 +607,246 @@ pub fn br(ctx: &mut Context, label: Label) {
} }
pub fn prepare_return_value(ctx: &mut Context) { pub fn prepare_return_value(ctx: &mut Context) {
let ret_gpr = pop_i32(ctx); pop_i32_into(ctx, ValueLocation::Reg(RAX));
if ret_gpr != RAX {
dynasm!(ctx.asm
; mov Rq(RAX), Rq(ret_gpr)
);
ctx.regs.release_scratch_gpr(ret_gpr);
}
} }
pub fn copy_incoming_arg(ctx: &mut Context, frame_size: u32, arg_pos: u32) { fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
let loc = abi_loc_for_arg(arg_pos); match (src, dst) {
(ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => {
// First, ensure the argument is in a register. let in_offset = adjusted_offset(ctx, in_offset);
let reg = match loc { let out_offset = adjusted_offset(ctx, out_offset);
ArgLocation::Reg(reg) => reg, if in_offset != out_offset {
ArgLocation::Stack(offset) => { let gpr = ctx.block_state.regs.take_scratch_gpr();
assert!(
ctx.regs.scratch_gprs.is_free(RAX),
"we assume that RAX can be used as a scratch register for now",
);
let offset = offset + (frame_size * WORD_SIZE) as i32;
dynasm!(ctx.asm dynasm!(ctx.asm
; mov Rq(RAX), [rsp + offset] ; mov Rq(gpr), [rsp + in_offset]
; mov [rsp + out_offset], Rq(gpr)
); );
RAX ctx.block_state.regs.release_scratch_gpr(gpr);
} }
}; }
(ValueLocation::Reg(in_reg), ValueLocation::Stack(out_offset)) => {
// And then move a value from a register into local variable area on the stack. let out_offset = adjusted_offset(ctx, out_offset);
let offset = sp_relative_offset(ctx, arg_pos);
dynasm!(ctx.asm dynasm!(ctx.asm
; mov [rsp + offset], Rq(reg) ; mov [rsp + out_offset], Rq(in_reg)
); );
}
(ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => {
let in_offset = adjusted_offset(ctx, in_offset);
dynasm!(ctx.asm
; mov Rq(out_reg), [rsp + in_offset]
);
}
(ValueLocation::Reg(in_reg), ValueLocation::Reg(out_reg)) => {
if in_reg != out_reg {
dynasm!(ctx.asm
; mov Rq(out_reg), Rq(in_reg)
);
}
}
}
} }
#[must_use] #[must_use]
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> i32 { pub struct CallCleanup {
let mut stack_args = Vec::with_capacity((arity as usize).saturating_sub(ARGS_IN_GPRS.len())); restore_registers: Vec<GPR>,
for arg_pos in (0..arity).rev() { stack_depth: i32,
ctx.sp_depth.free(1);
let loc = abi_loc_for_arg(arg_pos);
match loc {
ArgLocation::Reg(gpr) => {
dynasm!(ctx.asm
; pop Rq(gpr)
);
}
ArgLocation::Stack(_) => {
let gpr = ctx.regs.take_scratch_gpr();
dynasm!(ctx.asm
; pop Rq(gpr)
);
stack_args.push(gpr);
}
}
}
let num_stack_args = stack_args.len() as i32;
dynasm!(ctx.asm
; sub rsp, num_stack_args
);
for (stack_slot, gpr) in stack_args.into_iter().rev().enumerate() {
let offset = (stack_slot * WORD_SIZE as usize) as i32;
dynasm!(ctx.asm
; mov [rsp + offset], Rq(gpr)
);
ctx.regs.release_scratch_gpr(gpr);
}
num_stack_args
} }
fn post_call_cleanup(ctx: &mut Context, num_stack_args: i32) { /// Make sure that any argument registers that will be used by the call are free
/// by storing them to the stack.
///
/// Unfortunately, we can't elide this store if we're just passing arguments on
/// because these registers are caller-saved and so the callee can use them as
/// scratch space.
fn free_arg_registers(ctx: &mut Context, count: u32) {
if count == 0 {
return;
}
for i in 0..ctx.locals.locs.len() {
match ctx.locals.locs[i] {
ValueLocation::Reg(reg) => {
if ARGS_IN_GPRS.contains(&reg) {
let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _);
dynasm!(ctx.asm dynasm!(ctx.asm
; add rsp, num_stack_args ; mov [rsp + offset], Rq(reg)
); );
ctx.locals.locs[i] = ValueLocation::Stack(offset);
}
}
_ => {}
}
}
} }
fn free_return_register(ctx: &mut Context, count: u32) {
if count == 0 {
return;
}
for stack_val in &mut ctx.block_state.stack {
match stack_val.location(&ctx.locals) {
// For now it's impossible for a local to be in RAX but that might be
// possible in the future, so we check both cases.
Some(ValueLocation::Reg(RAX)) => {
let scratch = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm
; mov Rq(scratch), rax
);
*stack_val = StackValue::Temp(scratch);
}
_ => {}
}
}
}
// TODO: Use `ArrayVec`?
/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
let mut out = vec![];
// TODO: If there are no `StackValue::Pop`s that need to be popped
// before we reach our `Temp` value, we can set the `StackValue`
// for the register to be restored to `StackValue::Pop` (and
// release the register!) instead of restoring it.
for &reg in SCRATCH_REGS.iter() {
if !ctx.block_state.regs.is_free(reg) {
dynasm!(ctx.asm
; push Rq(reg)
);
out.push(reg);
}
}
out
}
/// Write the arguments to the callee to the registers and the stack using the SystemV
/// calling convention.
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
let out = CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
};
// We pop stack arguments first - arguments are RTL
if num_stack_args > 0 {
let size = num_stack_args * WORD_SIZE as i32;
// Reserve space for the outgoing stack arguments (so we don't
// stomp on any locals or the value stack).
dynasm!(ctx.asm
; sub rsp, size
);
ctx.block_state.depth.reserve(num_stack_args as u32);
for stack_slot in (0..num_stack_args).rev() {
// Since the stack offset is from the bottom of the locals
// and we want to start from the actual RSP (so `offset = 0`
// writes to `[rsp]`), we subtract our current depth.
//
// We might want to do this in the future by having a separate
// `AbsoluteValueLocation` and `RelativeValueLocation`.
let offset =
stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32;
pop_i32_into(ctx, ValueLocation::Stack(offset));
}
}
for reg in ARGS_IN_GPRS[..(arity as usize).min(ARGS_IN_GPRS.len())]
.iter()
.rev()
{
pop_i32_into(ctx, ValueLocation::Reg(*reg));
}
out
}
/// Frees up the stack space used for stack-passed arguments and restores the value
/// of volatile (i.e. caller-saved) registers to the state that they were in before
/// the call.
fn post_call_cleanup(ctx: &mut Context, mut cleanup: CallCleanup) {
if cleanup.stack_depth > 0 {
let size = cleanup.stack_depth * WORD_SIZE as i32;
dynasm!(ctx.asm
; add rsp, size
);
}
for reg in cleanup.restore_registers.drain(..).rev() {
dynasm!(ctx.asm
; pop Rq(reg)
);
}
}
/// Call a function with the given index
pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) { pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) {
assert!(return_arity == 0 || return_arity == 1); assert!(
return_arity == 0 || return_arity == 1,
"We don't support multiple return yet"
);
let num_stack_args = pass_outgoing_args(ctx, arg_arity); free_arg_registers(ctx, arg_arity);
free_return_register(ctx, return_arity);
let cleanup = pass_outgoing_args(ctx, arg_arity);
let label = &ctx.func_starts[index as usize].1; let label = &ctx.func_starts[index as usize].1;
dynasm!(ctx.asm dynasm!(ctx.asm
; call =>*label ; call =>*label
); );
post_call_cleanup(ctx, num_stack_args); post_call_cleanup(ctx, cleanup);
if return_arity == 1 {
dynasm!(ctx.asm
; push rax
);
ctx.sp_depth.reserve(1);
}
} }
pub fn prologue(ctx: &mut Context, stack_slots: u32) { // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them
let stack_slots = stack_slots; // as scratch registers
// TODO: Allow use of unused argument registers as scratch registers.
/// Writes the function prologue and stores the arguments as locals
pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) {
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
// We need space to store the register arguments if we need to call a function
// and overwrite these registers so we add `reg_args.len()`
let locals = locals + reg_args.len() as u32;
// Align stack slots to the nearest even number. This is required // Align stack slots to the nearest even number. This is required
// by x86-64 ABI. // by x86-64 ABI.
let aligned_stack_slots = (stack_slots + 1) & !1; let aligned_stack_slots = (locals + 1) & !1;
let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
ctx.locals.locs = reg_args
.iter()
.cloned()
.map(ValueLocation::Reg)
.chain(
(0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _))
// We add 2 here because 1 stack slot is used for the stack pointer and another is
// used for the return address. It's a magic number but there's not really a way
// around this.
.map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)),
)
.collect();
dynasm!(ctx.asm dynasm!(ctx.asm
; push rbp ; push rbp
; mov rbp, rsp ; mov rbp, rsp
);
if framesize > 0 {
dynasm!(ctx.asm
; sub rsp, framesize ; sub rsp, framesize
); );
ctx.sp_depth.reserve(aligned_stack_slots - stack_slots); }
} }
/// Writes the function epilogue, restoring the stack pointer and returning to the
/// caller.
pub fn epilogue(ctx: &mut Context) { pub fn epilogue(ctx: &mut Context) {
// We don't need to clean up the stack - `rsp` is restored and // We don't need to clean up the stack - RSP is restored and
// the calling function has its own register stack and will // the calling function has its own register stack and will
// stomp on the registers from our stack if necessary. // stomp on the registers from our stack if necessary.
dynasm!(ctx.asm dynasm!(ctx.asm

View File

@@ -56,31 +56,22 @@ struct ControlFrame {
/// becomes polymorphic only after an instruction that never passes control further is executed, /// becomes polymorphic only after an instruction that never passes control further is executed,
/// i.e. `unreachable`, `br` (but not `br_if`!), etc. /// i.e. `unreachable`, `br` (but not `br_if`!), etc.
stack_polymorphic: bool, stack_polymorphic: bool,
/// Relative stack depth at the beginning of the frame. /// State specific to the block (free temp registers, stack etc) which should be replaced
stack_depth: StackDepth, /// at the end of the block
block_state: BlockState,
ty: Type, ty: Type,
} }
impl ControlFrame { impl ControlFrame {
pub fn new(kind: ControlFrameKind, stack_depth: StackDepth, ty: Type) -> ControlFrame { pub fn new(kind: ControlFrameKind, block_state: BlockState, ty: Type) -> ControlFrame {
ControlFrame { ControlFrame {
kind, kind,
stack_depth, block_state,
ty, ty,
stack_polymorphic: false, stack_polymorphic: false,
} }
} }
pub fn outgoing_stack_depth(&self) -> StackDepth {
let mut outgoing_stack_depth = self.stack_depth;
if self.ty != Type::EmptyBlockType {
// If there a return value then reserve expected outgoing stack depth value
// to account for the result value.
outgoing_stack_depth.reserve(1);
}
outgoing_stack_depth
}
/// Marks this control frame as reached stack-polymorphic state. /// Marks this control frame as reached stack-polymorphic state.
pub fn mark_stack_polymorphic(&mut self) { pub fn mark_stack_polymorphic(&mut self) {
self.stack_polymorphic = true; self.stack_polymorphic = true;
@@ -103,20 +94,16 @@ pub fn translate(
Type::EmptyBlockType Type::EmptyBlockType
}; };
let mut framesize = arg_count; let mut num_locals = 0;
for local in locals { for local in locals {
let (count, _ty) = local?; let (count, _ty) = local?;
framesize += count; num_locals += count;
} }
let mut ctx = session.new_context(func_idx); let mut ctx = session.new_context(func_idx);
let operators = body.get_operators_reader()?; let operators = body.get_operators_reader()?;
prologue(&mut ctx, framesize); start_function(&mut ctx, arg_count, num_locals);
for arg_pos in 0..arg_count {
copy_incoming_arg(&mut ctx, framesize, arg_pos);
}
let mut control_frames = Vec::new(); let mut control_frames = Vec::new();
@@ -127,7 +114,7 @@ pub fn translate(
ControlFrameKind::Block { ControlFrameKind::Block {
end_label: epilogue_label, end_label: epilogue_label,
}, },
current_stack_depth(&ctx), current_block_state(&ctx),
return_ty, return_ty,
)); ));
@@ -148,7 +135,7 @@ pub fn translate(
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not }, ControlFrameKind::IfTrue { end_label, if_not },
current_stack_depth(&ctx), current_block_state(&ctx),
ty, ty,
)); ));
} }
@@ -157,7 +144,7 @@ pub fn translate(
Some(ControlFrame { Some(ControlFrame {
kind: ControlFrameKind::IfTrue { if_not, end_label }, kind: ControlFrameKind::IfTrue { if_not, end_label },
ty, ty,
stack_depth, block_state,
.. ..
}) => { }) => {
// Finalize if..else block by jumping to the `end_label`. // Finalize if..else block by jumping to the `end_label`.
@@ -167,7 +154,7 @@ pub fn translate(
// 0 it will branch here. // 0 it will branch here.
// After that reset stack depth to the value before entering `if` block. // After that reset stack depth to the value before entering `if` block.
define_label(&mut ctx, if_not); define_label(&mut ctx, if_not);
restore_stack_depth(&mut ctx, stack_depth); restore_block_state(&mut ctx, block_state.clone());
// Carry over the `end_label`, so it will be resolved when the corresponding `end` // Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered. // is encountered.
@@ -175,7 +162,7 @@ pub fn translate(
// Also note that we reset `stack_depth` to the value before entering `if` block. // Also note that we reset `stack_depth` to the value before entering `if` block.
let mut frame = ControlFrame::new( let mut frame = ControlFrame::new(
ControlFrameKind::IfFalse { end_label }, ControlFrameKind::IfFalse { end_label },
stack_depth, block_state,
ty, ty,
); );
control_frames.push(frame); control_frames.push(frame);
@@ -199,14 +186,12 @@ pub fn translate(
define_label(&mut ctx, if_not); define_label(&mut ctx, if_not);
} }
restore_stack_depth(&mut ctx, control_frame.outgoing_stack_depth());
if control_frames.len() == 0 {
// This is the last control frame. Perform the implicit return here. // This is the last control frame. Perform the implicit return here.
if return_ty != Type::EmptyBlockType { if control_frames.len() == 0 && return_ty != Type::EmptyBlockType {
prepare_return_value(&mut ctx); prepare_return_value(&mut ctx);
} }
}
// restore_block_state(&mut ctx, control_frame.block_state);
} }
Operator::I32Eq => relop_eq_i32(&mut ctx), Operator::I32Eq => relop_eq_i32(&mut ctx),
Operator::I32Add => i32_add(&mut ctx), Operator::I32Add => i32_add(&mut ctx),
@@ -228,6 +213,7 @@ pub fn translate(
callee_ty.params.len() as u32, callee_ty.params.len() as u32,
callee_ty.returns.len() as u32, callee_ty.returns.len() as u32,
); );
push_return_value(&mut ctx);
} }
_ => { _ => {
trap(&mut ctx); trap(&mut ctx);

View File

@@ -9,8 +9,10 @@ extern crate wasmparser;
#[macro_use] #[macro_use]
extern crate failure_derive; extern crate failure_derive;
extern crate dynasmrt; extern crate dynasmrt;
#[cfg(test)]
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
#[cfg(test)]
#[macro_use] #[macro_use]
extern crate quickcheck; extern crate quickcheck;
extern crate wabt; extern crate wabt;

View File

@@ -201,7 +201,9 @@ fn function_read_args_spill_to_stack() {
assert_eq!( assert_eq!(
{ {
let translated = translate_wat(code); let translated = translate_wat(code);
let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; let out: u32 = unsafe {
translated.execute_func(0, (7u32, 6u32, 5u32, 4u32, 3u32, 2u32, 1u32, 0u32))
};
out out
}, },
7 7
@@ -213,6 +215,7 @@ fn function_write_args_spill_to_stack() {
let code = r#" let code = r#"
(module (module
(func (param i32) (param i32) (param i32) (param i32) (func (param i32) (param i32) (param i32) (param i32)
(param i32) (param i32) (param i32) (param i32)
(param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32)
(result i32) (result i32)
@@ -225,16 +228,21 @@ fn function_write_args_spill_to_stack() {
(get_local 5) (get_local 5)
(get_local 6) (get_local 6)
(get_local 7) (get_local 7)
(get_local 8)
(get_local 9)
(get_local 10)
(get_local 11)
) )
) )
(func $called (func $called
(param i32) (param i32) (param i32) (param i32)
(param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32)
(param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32)
(result i32) (result i32)
(call $assert_zero (call $assert_zero
(get_local 7) (get_local 11)
) )
(get_local 0) (get_local 0)
) )
@@ -251,10 +259,10 @@ fn function_write_args_spill_to_stack() {
assert_eq!( assert_eq!(
{ {
let translated = translate_wat(code); let translated = translate_wat(code);
let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) };
out out
}, },
7 11
); );
} }
#[test] #[test]