Register allocation V2
This lays the groundwork for other on-the-fly optimisations, like passing literals through in order to do const folding in linear time, while compiling.
This commit is contained in:
715
src/backend.rs
715
src/backend.rs
@@ -10,6 +10,7 @@ const WORD_SIZE: u32 = 8;
|
||||
|
||||
type GPR = u8;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct GPRs {
|
||||
bits: u16,
|
||||
}
|
||||
@@ -36,13 +37,19 @@ const R12: u8 = 12;
|
||||
const R13: u8 = 13;
|
||||
const R14: u8 = 14;
|
||||
const R15: u8 = 15;
|
||||
const NUM_GPRS: u8 = 16;
|
||||
|
||||
impl GPRs {
|
||||
fn take(&mut self) -> GPR {
|
||||
let lz = self.bits.trailing_zeros();
|
||||
assert!(lz < 32, "ran out of free GPRs");
|
||||
self.bits &= !(1 << lz);
|
||||
lz as GPR
|
||||
assert!(lz < 16, "ran out of free GPRs");
|
||||
let gpr = lz as GPR;
|
||||
self.mark_used(gpr);
|
||||
gpr
|
||||
}
|
||||
|
||||
fn mark_used(&mut self, gpr: GPR) {
|
||||
self.bits &= !(1 << gpr as u16);
|
||||
}
|
||||
|
||||
fn release(&mut self, gpr: GPR) {
|
||||
@@ -50,62 +57,80 @@ impl GPRs {
|
||||
self.bits |= 1 << gpr;
|
||||
}
|
||||
|
||||
fn free_count(&self) -> u32 {
|
||||
self.bits.count_ones()
|
||||
}
|
||||
|
||||
fn is_free(&self, gpr: GPR) -> bool {
|
||||
(self.bits & (1 << gpr)) != 0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Registers {
|
||||
scratch_gprs: GPRs,
|
||||
scratch: GPRs,
|
||||
}
|
||||
|
||||
impl Default for Registers {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Registers {
|
||||
pub fn new() -> Self {
|
||||
let mut result = Self {
|
||||
scratch_gprs: GPRs::new(),
|
||||
scratch: GPRs::new(),
|
||||
};
|
||||
// Give ourselves a few scratch registers to work with, for now.
|
||||
result.release_scratch_gpr(RAX);
|
||||
result.release_scratch_gpr(RCX);
|
||||
result.release_scratch_gpr(RDX);
|
||||
for &scratch in SCRATCH_REGS {
|
||||
result.release_scratch_gpr(scratch);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// TODO: Add function that takes a scratch register if possible
|
||||
// but otherwise gives a fresh stack location.
|
||||
pub fn take_scratch_gpr(&mut self) -> GPR {
|
||||
self.scratch_gprs.take()
|
||||
self.scratch.take()
|
||||
}
|
||||
|
||||
pub fn release_scratch_gpr(&mut self, gpr: GPR) {
|
||||
self.scratch_gprs.release(gpr);
|
||||
self.scratch.release(gpr);
|
||||
}
|
||||
|
||||
pub fn is_free(&self, gpr: GPR) -> bool {
|
||||
self.scratch.is_free(gpr)
|
||||
}
|
||||
|
||||
pub fn free_scratch(&self) -> u32 {
|
||||
self.scratch.free_count()
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes location of a argument.
|
||||
#[derive(Debug)]
|
||||
enum ArgLocation {
|
||||
/// Argument is passed via some register.
|
||||
/// Describes location of a value.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
enum ValueLocation {
|
||||
/// Value exists in a register.
|
||||
Reg(GPR),
|
||||
/// Value is passed thru the stack.
|
||||
/// Value exists on the stack. This is an offset relative to the
|
||||
/// first local, and so will have to be adjusted with `adjusted_offset`
|
||||
/// before reading (as RSP may have been changed by `push`/`pop`).
|
||||
Stack(i32),
|
||||
}
|
||||
|
||||
// TODO: This assumes only system-v calling convention.
|
||||
// In system-v calling convention the first 6 arguments are passed via registers.
|
||||
// All rest arguments are passed on the stack.
|
||||
const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
|
||||
|
||||
/// Get a location for an argument at the given position.
|
||||
fn abi_loc_for_arg(pos: u32) -> ArgLocation {
|
||||
if let Some(®) = ARGS_IN_GPRS.get(pos as usize) {
|
||||
ArgLocation::Reg(reg)
|
||||
} else {
|
||||
let stack_pos = pos - ARGS_IN_GPRS.len() as u32;
|
||||
// +2 is because the first argument is located right after the saved frame pointer slot
|
||||
// and the incoming return address.
|
||||
let stack_offset = ((stack_pos + 2) * WORD_SIZE) as i32;
|
||||
ArgLocation::Stack(stack_offset)
|
||||
}
|
||||
}
|
||||
const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
|
||||
// RAX is reserved for return values. In the future we want a system to allow
|
||||
// use of specific registers by saving/restoring them. This would allow using
|
||||
// RAX as a scratch register when we're not calling a function, and would also
|
||||
// allow us to call instructions that require specific registers.
|
||||
//
|
||||
// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
|
||||
const SCRATCH_REGS: &[GPR] = &[R10, R11];
|
||||
|
||||
pub struct CodeGenSession {
|
||||
assembler: Assembler,
|
||||
@@ -138,8 +163,8 @@ impl CodeGenSession {
|
||||
Context {
|
||||
asm: &mut self.assembler,
|
||||
func_starts: &self.func_starts,
|
||||
regs: Registers::new(),
|
||||
sp_depth: StackDepth(0),
|
||||
block_state: Default::default(),
|
||||
locals: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,14 +202,78 @@ impl TranslatedCodeSection {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Immediates? We could implement on-the-fly const folding
|
||||
#[derive(Copy, Clone)]
|
||||
enum Value {
|
||||
Local(u32),
|
||||
Temp(GPR),
|
||||
}
|
||||
|
||||
impl Value {
|
||||
fn location(&self, locals: &Locals) -> ValueLocation {
|
||||
match *self {
|
||||
Value::Local(loc) => local_location(locals, loc),
|
||||
Value::Temp(reg) => ValueLocation::Reg(reg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
enum StackValue {
|
||||
Local(u32),
|
||||
Temp(GPR),
|
||||
Pop,
|
||||
}
|
||||
|
||||
impl StackValue {
|
||||
fn location(&self, locals: &Locals) -> Option<ValueLocation> {
|
||||
match *self {
|
||||
StackValue::Local(loc) => Some(local_location(locals, loc)),
|
||||
StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)),
|
||||
StackValue::Pop => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Locals {
|
||||
// TODO: Use `ArrayVec` since we have a hard maximum (the number of registers)
|
||||
locs: Vec<ValueLocation>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub struct BlockState {
|
||||
stack: Stack,
|
||||
depth: StackDepth,
|
||||
regs: Registers,
|
||||
}
|
||||
|
||||
fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 {
|
||||
(ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset
|
||||
}
|
||||
|
||||
fn local_location(locals: &Locals, index: u32) -> ValueLocation {
|
||||
locals
|
||||
.locs
|
||||
.get(index as usize)
|
||||
.cloned()
|
||||
.unwrap_or(ValueLocation::Stack(
|
||||
(index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _,
|
||||
))
|
||||
}
|
||||
|
||||
type Stack = Vec<StackValue>;
|
||||
|
||||
pub struct Context<'a> {
|
||||
asm: &'a mut Assembler,
|
||||
func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>,
|
||||
regs: Registers,
|
||||
/// Each push and pop on the value stack increments or decrements this value by 1 respectively.
|
||||
sp_depth: StackDepth,
|
||||
block_state: BlockState,
|
||||
locals: Locals,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {}
|
||||
|
||||
/// Label in code.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct Label(DynamicLabel);
|
||||
@@ -203,7 +292,7 @@ pub fn define_label(ctx: &mut Context, label: Label) {
|
||||
}
|
||||
|
||||
/// Offset from starting value of SP counted in words.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct StackDepth(u32);
|
||||
|
||||
impl StackDepth {
|
||||
@@ -216,146 +305,298 @@ impl StackDepth {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn current_stack_depth(ctx: &Context) -> StackDepth {
|
||||
ctx.sp_depth
|
||||
pub fn current_block_state(ctx: &Context) -> BlockState {
|
||||
ctx.block_state.clone()
|
||||
}
|
||||
|
||||
pub fn restore_stack_depth(ctx: &mut Context, stack_depth: StackDepth) {
|
||||
ctx.sp_depth = stack_depth;
|
||||
pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) {
|
||||
ctx.block_state = block_state;
|
||||
}
|
||||
|
||||
fn push_i32(ctx: &mut Context, gpr: GPR) {
|
||||
// For now, do an actual push (and pop below). In the future, we could
|
||||
// do on-the-fly register allocation here.
|
||||
ctx.sp_depth.reserve(1);
|
||||
pub fn push_return_value(ctx: &mut Context) {
|
||||
ctx.block_state.stack.push(StackValue::Temp(RAX));
|
||||
}
|
||||
|
||||
fn push_i32(ctx: &mut Context, value: Value) {
|
||||
let stack_loc = match value {
|
||||
Value::Local(loc) => StackValue::Local(loc),
|
||||
Value::Temp(gpr) => {
|
||||
if ctx.block_state.regs.free_scratch() >= 1 {
|
||||
StackValue::Temp(gpr)
|
||||
} else {
|
||||
ctx.block_state.depth.reserve(1);
|
||||
dynasm!(ctx.asm
|
||||
; push Rq(gpr)
|
||||
);
|
||||
ctx.regs.release_scratch_gpr(gpr);
|
||||
ctx.block_state.regs.release_scratch_gpr(gpr);
|
||||
StackValue::Pop
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ctx.block_state.stack.push(stack_loc);
|
||||
}
|
||||
|
||||
fn pop_i32(ctx: &mut Context) -> GPR {
|
||||
ctx.sp_depth.free(1);
|
||||
let gpr = ctx.regs.take_scratch_gpr();
|
||||
fn pop_i32(ctx: &mut Context) -> Value {
|
||||
match ctx.block_state.stack.pop().expect("Stack is empty") {
|
||||
StackValue::Local(loc) => Value::Local(loc),
|
||||
StackValue::Temp(reg) => Value::Temp(reg),
|
||||
StackValue::Pop => {
|
||||
ctx.block_state.depth.free(1);
|
||||
let gpr = ctx.block_state.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; pop Rq(gpr)
|
||||
);
|
||||
gpr
|
||||
Value::Temp(gpr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) {
|
||||
let val = pop_i32(ctx);
|
||||
let val_loc = val.location(&ctx.locals);
|
||||
copy_value(ctx, val_loc, dst);
|
||||
free_val(ctx, val);
|
||||
}
|
||||
|
||||
fn free_val(ctx: &mut Context, val: Value) {
|
||||
match val {
|
||||
Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg),
|
||||
Value::Local(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Puts this value into a register so that it can be efficiently read
|
||||
fn into_reg(ctx: &mut Context, val: Value) -> GPR {
|
||||
match val.location(&ctx.locals) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
let scratch = ctx.block_state.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(scratch), [rsp + offset]
|
||||
);
|
||||
scratch
|
||||
}
|
||||
ValueLocation::Reg(reg) => reg,
|
||||
}
|
||||
}
|
||||
|
||||
/// Puts this value into a temporary register so that operations
|
||||
/// on that register don't write to a local.
|
||||
fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
|
||||
match val {
|
||||
Value::Local(loc) => {
|
||||
let scratch = ctx.block_state.regs.take_scratch_gpr();
|
||||
|
||||
match local_location(&ctx.locals, loc) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(scratch), [rsp + offset]
|
||||
);
|
||||
}
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(scratch), Rq(reg)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
scratch
|
||||
}
|
||||
Value::Temp(reg) => reg,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: For the commutative instructions we can do operands in either
|
||||
// order, so we can choose the operand order that creates the
|
||||
// least unnecessary temps.
|
||||
pub fn i32_add(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; add Rd(op1), Rd(op0)
|
||||
; add Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; add Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn i32_sub(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; sub Rd(op1), Rd(op0)
|
||||
; sub Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; sub Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn i32_and(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; and Rd(op1), Rd(op0)
|
||||
; and Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; and Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn i32_or(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; or Rd(op1), Rd(op0)
|
||||
; or Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; or Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn i32_xor(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; xor Rd(op1), Rd(op0)
|
||||
; xor Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; xor Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn i32_mul(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
let tmp = pop_i32(ctx);
|
||||
let op1 = into_temp_reg(ctx, tmp);
|
||||
match op0.location(&ctx.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; imul Rd(op1), Rd(op0)
|
||||
; imul Rd(op1), Rd(reg)
|
||||
);
|
||||
push_i32(ctx, op1);
|
||||
ctx.regs.release_scratch_gpr(op0);
|
||||
}
|
||||
|
||||
fn sp_relative_offset(ctx: &mut Context, slot_idx: u32) -> i32 {
|
||||
((ctx.sp_depth.0 as i32) + slot_idx as i32) * WORD_SIZE as i32
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; imul Rd(op1), [rsp + offset]
|
||||
);
|
||||
}
|
||||
}
|
||||
ctx.block_state.stack.push(StackValue::Temp(op1));
|
||||
free_val(ctx, op0);
|
||||
}
|
||||
|
||||
pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
|
||||
let gpr = ctx.regs.take_scratch_gpr();
|
||||
let offset = sp_relative_offset(ctx, local_idx);
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(gpr), [rsp + offset]
|
||||
);
|
||||
push_i32(ctx, gpr);
|
||||
push_i32(ctx, Value::Local(local_idx));
|
||||
}
|
||||
|
||||
// TODO: We can put locals that were spilled to the stack
|
||||
// back into registers here.
|
||||
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
|
||||
let gpr = pop_i32(ctx);
|
||||
let offset = sp_relative_offset(ctx, local_idx);
|
||||
dynasm!(ctx.asm
|
||||
; mov [rsp + offset], Rq(gpr)
|
||||
);
|
||||
ctx.regs.release_scratch_gpr(gpr);
|
||||
let val = pop_i32(ctx);
|
||||
let val_loc = val.location(&ctx.locals);
|
||||
let dst_loc = local_location(&ctx.locals, local_idx);
|
||||
copy_value(ctx, val_loc, dst_loc);
|
||||
free_val(ctx, val);
|
||||
}
|
||||
|
||||
// TODO: Don't store literals at all, roll them into `Value`
|
||||
pub fn literal_i32(ctx: &mut Context, imm: i32) {
|
||||
let gpr = ctx.regs.take_scratch_gpr();
|
||||
let gpr = ctx.block_state.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; mov Rd(gpr), imm
|
||||
);
|
||||
push_i32(ctx, gpr);
|
||||
push_i32(ctx, Value::Temp(gpr));
|
||||
}
|
||||
|
||||
pub fn relop_eq_i32(ctx: &mut Context) {
|
||||
let right = pop_i32(ctx);
|
||||
let left = pop_i32(ctx);
|
||||
let result = ctx.regs.take_scratch_gpr();
|
||||
let result = ctx.block_state.regs.take_scratch_gpr();
|
||||
let lreg = into_reg(ctx, left);
|
||||
match right.location(&ctx.locals) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; xor Rq(result), Rq(result)
|
||||
; cmp Rd(left), Rd(right)
|
||||
; cmp Rd(lreg), [rsp + offset]
|
||||
; sete Rb(result)
|
||||
);
|
||||
push_i32(ctx, result);
|
||||
ctx.regs.release_scratch_gpr(left);
|
||||
ctx.regs.release_scratch_gpr(right);
|
||||
}
|
||||
ValueLocation::Reg(rreg) => {
|
||||
dynasm!(ctx.asm
|
||||
; xor Rq(result), Rq(result)
|
||||
; cmp Rd(lreg), Rd(rreg)
|
||||
; sete Rb(result)
|
||||
);
|
||||
}
|
||||
}
|
||||
push_i32(ctx, Value::Temp(result));
|
||||
free_val(ctx, left);
|
||||
free_val(ctx, right);
|
||||
}
|
||||
|
||||
/// Pops i32 predicate and branches to the specified label
|
||||
/// if the predicate is equal to zero.
|
||||
pub fn pop_and_breq(ctx: &mut Context, label: Label) {
|
||||
let predicate = pop_i32(ctx);
|
||||
let val = pop_i32(ctx);
|
||||
let predicate = into_temp_reg(ctx, val);
|
||||
dynasm!(ctx.asm
|
||||
; test Rd(predicate), Rd(predicate)
|
||||
; je =>label.0
|
||||
);
|
||||
ctx.regs.release_scratch_gpr(predicate);
|
||||
ctx.block_state.regs.release_scratch_gpr(predicate);
|
||||
}
|
||||
|
||||
/// Branch unconditionally to the specified label.
|
||||
@@ -366,122 +607,246 @@ pub fn br(ctx: &mut Context, label: Label) {
|
||||
}
|
||||
|
||||
pub fn prepare_return_value(ctx: &mut Context) {
|
||||
let ret_gpr = pop_i32(ctx);
|
||||
if ret_gpr != RAX {
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(RAX), Rq(ret_gpr)
|
||||
);
|
||||
ctx.regs.release_scratch_gpr(ret_gpr);
|
||||
}
|
||||
pop_i32_into(ctx, ValueLocation::Reg(RAX));
|
||||
}
|
||||
|
||||
pub fn copy_incoming_arg(ctx: &mut Context, frame_size: u32, arg_pos: u32) {
|
||||
let loc = abi_loc_for_arg(arg_pos);
|
||||
|
||||
// First, ensure the argument is in a register.
|
||||
let reg = match loc {
|
||||
ArgLocation::Reg(reg) => reg,
|
||||
ArgLocation::Stack(offset) => {
|
||||
assert!(
|
||||
ctx.regs.scratch_gprs.is_free(RAX),
|
||||
"we assume that RAX can be used as a scratch register for now",
|
||||
);
|
||||
let offset = offset + (frame_size * WORD_SIZE) as i32;
|
||||
fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
|
||||
match (src, dst) {
|
||||
(ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => {
|
||||
let in_offset = adjusted_offset(ctx, in_offset);
|
||||
let out_offset = adjusted_offset(ctx, out_offset);
|
||||
if in_offset != out_offset {
|
||||
let gpr = ctx.block_state.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(RAX), [rsp + offset]
|
||||
; mov Rq(gpr), [rsp + in_offset]
|
||||
; mov [rsp + out_offset], Rq(gpr)
|
||||
);
|
||||
RAX
|
||||
ctx.block_state.regs.release_scratch_gpr(gpr);
|
||||
}
|
||||
};
|
||||
|
||||
// And then move a value from a register into local variable area on the stack.
|
||||
let offset = sp_relative_offset(ctx, arg_pos);
|
||||
}
|
||||
(ValueLocation::Reg(in_reg), ValueLocation::Stack(out_offset)) => {
|
||||
let out_offset = adjusted_offset(ctx, out_offset);
|
||||
dynasm!(ctx.asm
|
||||
; mov [rsp + offset], Rq(reg)
|
||||
; mov [rsp + out_offset], Rq(in_reg)
|
||||
);
|
||||
}
|
||||
(ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => {
|
||||
let in_offset = adjusted_offset(ctx, in_offset);
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(out_reg), [rsp + in_offset]
|
||||
);
|
||||
}
|
||||
(ValueLocation::Reg(in_reg), ValueLocation::Reg(out_reg)) => {
|
||||
if in_reg != out_reg {
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(out_reg), Rq(in_reg)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> i32 {
|
||||
let mut stack_args = Vec::with_capacity((arity as usize).saturating_sub(ARGS_IN_GPRS.len()));
|
||||
for arg_pos in (0..arity).rev() {
|
||||
ctx.sp_depth.free(1);
|
||||
|
||||
let loc = abi_loc_for_arg(arg_pos);
|
||||
match loc {
|
||||
ArgLocation::Reg(gpr) => {
|
||||
dynasm!(ctx.asm
|
||||
; pop Rq(gpr)
|
||||
);
|
||||
}
|
||||
ArgLocation::Stack(_) => {
|
||||
let gpr = ctx.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; pop Rq(gpr)
|
||||
);
|
||||
stack_args.push(gpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let num_stack_args = stack_args.len() as i32;
|
||||
dynasm!(ctx.asm
|
||||
; sub rsp, num_stack_args
|
||||
);
|
||||
for (stack_slot, gpr) in stack_args.into_iter().rev().enumerate() {
|
||||
let offset = (stack_slot * WORD_SIZE as usize) as i32;
|
||||
dynasm!(ctx.asm
|
||||
; mov [rsp + offset], Rq(gpr)
|
||||
);
|
||||
ctx.regs.release_scratch_gpr(gpr);
|
||||
}
|
||||
|
||||
num_stack_args
|
||||
pub struct CallCleanup {
|
||||
restore_registers: Vec<GPR>,
|
||||
stack_depth: i32,
|
||||
}
|
||||
|
||||
fn post_call_cleanup(ctx: &mut Context, num_stack_args: i32) {
|
||||
/// Make sure that any argument registers that will be used by the call are free
|
||||
/// by storing them to the stack.
|
||||
///
|
||||
/// Unfortunately, we can't elide this store if we're just passing arguments on
|
||||
/// because these registers are caller-saved and so the callee can use them as
|
||||
/// scratch space.
|
||||
fn free_arg_registers(ctx: &mut Context, count: u32) {
|
||||
if count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
for i in 0..ctx.locals.locs.len() {
|
||||
match ctx.locals.locs[i] {
|
||||
ValueLocation::Reg(reg) => {
|
||||
if ARGS_IN_GPRS.contains(®) {
|
||||
let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _);
|
||||
dynasm!(ctx.asm
|
||||
; add rsp, num_stack_args
|
||||
; mov [rsp + offset], Rq(reg)
|
||||
);
|
||||
ctx.locals.locs[i] = ValueLocation::Stack(offset);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn free_return_register(ctx: &mut Context, count: u32) {
|
||||
if count == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
for stack_val in &mut ctx.block_state.stack {
|
||||
match stack_val.location(&ctx.locals) {
|
||||
// For now it's impossible for a local to be in RAX but that might be
|
||||
// possible in the future, so we check both cases.
|
||||
Some(ValueLocation::Reg(RAX)) => {
|
||||
let scratch = ctx.block_state.regs.take_scratch_gpr();
|
||||
dynasm!(ctx.asm
|
||||
; mov Rq(scratch), rax
|
||||
);
|
||||
*stack_val = StackValue::Temp(scratch);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Use `ArrayVec`?
|
||||
/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
|
||||
fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
|
||||
let mut out = vec![];
|
||||
|
||||
// TODO: If there are no `StackValue::Pop`s that need to be popped
|
||||
// before we reach our `Temp` value, we can set the `StackValue`
|
||||
// for the register to be restored to `StackValue::Pop` (and
|
||||
// release the register!) instead of restoring it.
|
||||
for ® in SCRATCH_REGS.iter() {
|
||||
if !ctx.block_state.regs.is_free(reg) {
|
||||
dynasm!(ctx.asm
|
||||
; push Rq(reg)
|
||||
);
|
||||
out.push(reg);
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Write the arguments to the callee to the registers and the stack using the SystemV
|
||||
/// calling convention.
|
||||
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
|
||||
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
|
||||
|
||||
let out = CallCleanup {
|
||||
stack_depth: num_stack_args,
|
||||
restore_registers: save_volatile(ctx),
|
||||
};
|
||||
|
||||
// We pop stack arguments first - arguments are RTL
|
||||
if num_stack_args > 0 {
|
||||
let size = num_stack_args * WORD_SIZE as i32;
|
||||
|
||||
// Reserve space for the outgoing stack arguments (so we don't
|
||||
// stomp on any locals or the value stack).
|
||||
dynasm!(ctx.asm
|
||||
; sub rsp, size
|
||||
);
|
||||
ctx.block_state.depth.reserve(num_stack_args as u32);
|
||||
|
||||
for stack_slot in (0..num_stack_args).rev() {
|
||||
// Since the stack offset is from the bottom of the locals
|
||||
// and we want to start from the actual RSP (so `offset = 0`
|
||||
// writes to `[rsp]`), we subtract our current depth.
|
||||
//
|
||||
// We might want to do this in the future by having a separate
|
||||
// `AbsoluteValueLocation` and `RelativeValueLocation`.
|
||||
let offset =
|
||||
stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32;
|
||||
pop_i32_into(ctx, ValueLocation::Stack(offset));
|
||||
}
|
||||
}
|
||||
|
||||
for reg in ARGS_IN_GPRS[..(arity as usize).min(ARGS_IN_GPRS.len())]
|
||||
.iter()
|
||||
.rev()
|
||||
{
|
||||
pop_i32_into(ctx, ValueLocation::Reg(*reg));
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Frees up the stack space used for stack-passed arguments and restores the value
|
||||
/// of volatile (i.e. caller-saved) registers to the state that they were in before
|
||||
/// the call.
|
||||
fn post_call_cleanup(ctx: &mut Context, mut cleanup: CallCleanup) {
|
||||
if cleanup.stack_depth > 0 {
|
||||
let size = cleanup.stack_depth * WORD_SIZE as i32;
|
||||
dynasm!(ctx.asm
|
||||
; add rsp, size
|
||||
);
|
||||
}
|
||||
|
||||
for reg in cleanup.restore_registers.drain(..).rev() {
|
||||
dynasm!(ctx.asm
|
||||
; pop Rq(reg)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Call a function with the given index
|
||||
pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) {
|
||||
assert!(return_arity == 0 || return_arity == 1);
|
||||
assert!(
|
||||
return_arity == 0 || return_arity == 1,
|
||||
"We don't support multiple return yet"
|
||||
);
|
||||
|
||||
let num_stack_args = pass_outgoing_args(ctx, arg_arity);
|
||||
free_arg_registers(ctx, arg_arity);
|
||||
free_return_register(ctx, return_arity);
|
||||
|
||||
let cleanup = pass_outgoing_args(ctx, arg_arity);
|
||||
|
||||
let label = &ctx.func_starts[index as usize].1;
|
||||
dynasm!(ctx.asm
|
||||
; call =>*label
|
||||
);
|
||||
|
||||
post_call_cleanup(ctx, num_stack_args);
|
||||
|
||||
if return_arity == 1 {
|
||||
dynasm!(ctx.asm
|
||||
; push rax
|
||||
);
|
||||
ctx.sp_depth.reserve(1);
|
||||
}
|
||||
post_call_cleanup(ctx, cleanup);
|
||||
}
|
||||
|
||||
pub fn prologue(ctx: &mut Context, stack_slots: u32) {
|
||||
let stack_slots = stack_slots;
|
||||
// TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them
|
||||
// as scratch registers
|
||||
// TODO: Allow use of unused argument registers as scratch registers.
|
||||
/// Writes the function prologue and stores the arguments as locals
|
||||
pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) {
|
||||
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
|
||||
|
||||
// We need space to store the register arguments if we need to call a function
|
||||
// and overwrite these registers so we add `reg_args.len()`
|
||||
let locals = locals + reg_args.len() as u32;
|
||||
// Align stack slots to the nearest even number. This is required
|
||||
// by x86-64 ABI.
|
||||
let aligned_stack_slots = (stack_slots + 1) & !1;
|
||||
|
||||
let aligned_stack_slots = (locals + 1) & !1;
|
||||
let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
||||
|
||||
ctx.locals.locs = reg_args
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(ValueLocation::Reg)
|
||||
.chain(
|
||||
(0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _))
|
||||
// We add 2 here because 1 stack slot is used for the stack pointer and another is
|
||||
// used for the return address. It's a magic number but there's not really a way
|
||||
// around this.
|
||||
.map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)),
|
||||
)
|
||||
.collect();
|
||||
|
||||
dynasm!(ctx.asm
|
||||
; push rbp
|
||||
; mov rbp, rsp
|
||||
);
|
||||
|
||||
if framesize > 0 {
|
||||
dynasm!(ctx.asm
|
||||
; sub rsp, framesize
|
||||
);
|
||||
ctx.sp_depth.reserve(aligned_stack_slots - stack_slots);
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the function epilogue, restoring the stack pointer and returning to the
|
||||
/// caller.
|
||||
pub fn epilogue(ctx: &mut Context) {
|
||||
// We don't need to clean up the stack - `rsp` is restored and
|
||||
// We don't need to clean up the stack - RSP is restored and
|
||||
// the calling function has its own register stack and will
|
||||
// stomp on the registers from our stack if necessary.
|
||||
dynasm!(ctx.asm
|
||||
|
||||
@@ -56,31 +56,22 @@ struct ControlFrame {
|
||||
/// becomes polymorphic only after an instruction that never passes control further is executed,
|
||||
/// i.e. `unreachable`, `br` (but not `br_if`!), etc.
|
||||
stack_polymorphic: bool,
|
||||
/// Relative stack depth at the beginning of the frame.
|
||||
stack_depth: StackDepth,
|
||||
/// State specific to the block (free temp registers, stack etc) which should be replaced
|
||||
/// at the end of the block
|
||||
block_state: BlockState,
|
||||
ty: Type,
|
||||
}
|
||||
|
||||
impl ControlFrame {
|
||||
pub fn new(kind: ControlFrameKind, stack_depth: StackDepth, ty: Type) -> ControlFrame {
|
||||
pub fn new(kind: ControlFrameKind, block_state: BlockState, ty: Type) -> ControlFrame {
|
||||
ControlFrame {
|
||||
kind,
|
||||
stack_depth,
|
||||
block_state,
|
||||
ty,
|
||||
stack_polymorphic: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn outgoing_stack_depth(&self) -> StackDepth {
|
||||
let mut outgoing_stack_depth = self.stack_depth;
|
||||
if self.ty != Type::EmptyBlockType {
|
||||
// If there a return value then reserve expected outgoing stack depth value
|
||||
// to account for the result value.
|
||||
outgoing_stack_depth.reserve(1);
|
||||
}
|
||||
outgoing_stack_depth
|
||||
}
|
||||
|
||||
/// Marks this control frame as reached stack-polymorphic state.
|
||||
pub fn mark_stack_polymorphic(&mut self) {
|
||||
self.stack_polymorphic = true;
|
||||
@@ -103,20 +94,16 @@ pub fn translate(
|
||||
Type::EmptyBlockType
|
||||
};
|
||||
|
||||
let mut framesize = arg_count;
|
||||
let mut num_locals = 0;
|
||||
for local in locals {
|
||||
let (count, _ty) = local?;
|
||||
framesize += count;
|
||||
num_locals += count;
|
||||
}
|
||||
|
||||
let mut ctx = session.new_context(func_idx);
|
||||
let operators = body.get_operators_reader()?;
|
||||
|
||||
prologue(&mut ctx, framesize);
|
||||
|
||||
for arg_pos in 0..arg_count {
|
||||
copy_incoming_arg(&mut ctx, framesize, arg_pos);
|
||||
}
|
||||
start_function(&mut ctx, arg_count, num_locals);
|
||||
|
||||
let mut control_frames = Vec::new();
|
||||
|
||||
@@ -127,7 +114,7 @@ pub fn translate(
|
||||
ControlFrameKind::Block {
|
||||
end_label: epilogue_label,
|
||||
},
|
||||
current_stack_depth(&ctx),
|
||||
current_block_state(&ctx),
|
||||
return_ty,
|
||||
));
|
||||
|
||||
@@ -148,7 +135,7 @@ pub fn translate(
|
||||
|
||||
control_frames.push(ControlFrame::new(
|
||||
ControlFrameKind::IfTrue { end_label, if_not },
|
||||
current_stack_depth(&ctx),
|
||||
current_block_state(&ctx),
|
||||
ty,
|
||||
));
|
||||
}
|
||||
@@ -157,7 +144,7 @@ pub fn translate(
|
||||
Some(ControlFrame {
|
||||
kind: ControlFrameKind::IfTrue { if_not, end_label },
|
||||
ty,
|
||||
stack_depth,
|
||||
block_state,
|
||||
..
|
||||
}) => {
|
||||
// Finalize if..else block by jumping to the `end_label`.
|
||||
@@ -167,7 +154,7 @@ pub fn translate(
|
||||
// 0 it will branch here.
|
||||
// After that reset stack depth to the value before entering `if` block.
|
||||
define_label(&mut ctx, if_not);
|
||||
restore_stack_depth(&mut ctx, stack_depth);
|
||||
restore_block_state(&mut ctx, block_state.clone());
|
||||
|
||||
// Carry over the `end_label`, so it will be resolved when the corresponding `end`
|
||||
// is encountered.
|
||||
@@ -175,7 +162,7 @@ pub fn translate(
|
||||
// Also note that we reset `stack_depth` to the value before entering `if` block.
|
||||
let mut frame = ControlFrame::new(
|
||||
ControlFrameKind::IfFalse { end_label },
|
||||
stack_depth,
|
||||
block_state,
|
||||
ty,
|
||||
);
|
||||
control_frames.push(frame);
|
||||
@@ -199,14 +186,12 @@ pub fn translate(
|
||||
define_label(&mut ctx, if_not);
|
||||
}
|
||||
|
||||
restore_stack_depth(&mut ctx, control_frame.outgoing_stack_depth());
|
||||
|
||||
if control_frames.len() == 0 {
|
||||
// This is the last control frame. Perform the implicit return here.
|
||||
if return_ty != Type::EmptyBlockType {
|
||||
if control_frames.len() == 0 && return_ty != Type::EmptyBlockType {
|
||||
prepare_return_value(&mut ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// restore_block_state(&mut ctx, control_frame.block_state);
|
||||
}
|
||||
Operator::I32Eq => relop_eq_i32(&mut ctx),
|
||||
Operator::I32Add => i32_add(&mut ctx),
|
||||
@@ -228,6 +213,7 @@ pub fn translate(
|
||||
callee_ty.params.len() as u32,
|
||||
callee_ty.returns.len() as u32,
|
||||
);
|
||||
push_return_value(&mut ctx);
|
||||
}
|
||||
_ => {
|
||||
trap(&mut ctx);
|
||||
|
||||
@@ -9,8 +9,10 @@ extern crate wasmparser;
|
||||
#[macro_use]
|
||||
extern crate failure_derive;
|
||||
extern crate dynasmrt;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate quickcheck;
|
||||
extern crate wabt;
|
||||
|
||||
16
src/tests.rs
16
src/tests.rs
@@ -201,7 +201,9 @@ fn function_read_args_spill_to_stack() {
|
||||
assert_eq!(
|
||||
{
|
||||
let translated = translate_wat(code);
|
||||
let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) };
|
||||
let out: u32 = unsafe {
|
||||
translated.execute_func(0, (7u32, 6u32, 5u32, 4u32, 3u32, 2u32, 1u32, 0u32))
|
||||
};
|
||||
out
|
||||
},
|
||||
7
|
||||
@@ -213,6 +215,7 @@ fn function_write_args_spill_to_stack() {
|
||||
let code = r#"
|
||||
(module
|
||||
(func (param i32) (param i32) (param i32) (param i32)
|
||||
(param i32) (param i32) (param i32) (param i32)
|
||||
(param i32) (param i32) (param i32) (param i32)
|
||||
(result i32)
|
||||
|
||||
@@ -225,16 +228,21 @@ fn function_write_args_spill_to_stack() {
|
||||
(get_local 5)
|
||||
(get_local 6)
|
||||
(get_local 7)
|
||||
(get_local 8)
|
||||
(get_local 9)
|
||||
(get_local 10)
|
||||
(get_local 11)
|
||||
)
|
||||
)
|
||||
|
||||
(func $called
|
||||
(param i32) (param i32) (param i32) (param i32)
|
||||
(param i32) (param i32) (param i32) (param i32)
|
||||
(param i32) (param i32) (param i32) (param i32)
|
||||
(result i32)
|
||||
|
||||
(call $assert_zero
|
||||
(get_local 7)
|
||||
(get_local 11)
|
||||
)
|
||||
(get_local 0)
|
||||
)
|
||||
@@ -251,10 +259,10 @@ fn function_write_args_spill_to_stack() {
|
||||
assert_eq!(
|
||||
{
|
||||
let translated = translate_wat(code);
|
||||
let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) };
|
||||
let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) };
|
||||
out
|
||||
},
|
||||
7
|
||||
11
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user