Allow blocks to return values in any register

This commit is contained in:
Jef
2018-12-18 17:16:38 +01:00
parent 74ffb8560c
commit 5418241dc6
3 changed files with 104 additions and 39 deletions

View File

@@ -211,7 +211,6 @@ impl TranslatedCodeSection {
}
}
// TODO: Immediates? We could implement on-the-fly const folding
#[derive(Debug, Copy, Clone, PartialEq)]
enum Value {
Local(u32),
@@ -287,6 +286,7 @@ pub struct BlockState {
// TODO: `BitVec`
stack_map: Vec<bool>,
depth: StackDepth,
return_register: Option<GPR>,
regs: Registers,
/// This is the _current_ locals, since we can shuffle them about during function calls.
/// We will restore this to be the same state as the `Locals` in `Context` at the end
@@ -426,17 +426,43 @@ pub fn return_from_block(ctx: &mut Context, arity: u32, is_function_end: bool) {
}
let stack_top = *ctx.block_state.stack.last().expect("Stack is empty");
put_stack_val_into(ctx, stack_top, ValueLocation::Reg(RAX))
if let Some(reg) = ctx.block_state.return_register {
put_stack_val_into(ctx, stack_top, ValueLocation::Reg(reg));
} else {
let out_reg = match stack_top {
StackValue::Temp(r) => r,
other => {
let new_scratch = ctx.block_state.regs.take_scratch_gpr();
put_stack_val_into(ctx, other, ValueLocation::Reg(new_scratch));
new_scratch
}
};
ctx.block_state.return_register = Some(out_reg);
}
}
pub fn start_block(ctx: &mut Context, arity: u32) -> BlockState {
free_return_register(ctx, arity);
pub fn start_block(ctx: &mut Context) -> BlockState {
// free_return_register(ctx, arity);
let current_state = ctx.block_state.clone();
ctx.block_state.parent_locals = ctx.block_state.locals.clone();
ctx.block_state.return_register = None;
current_state
}
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32) {
// To start the next subblock of a block (for `if..then..else..end`).
// The only difference is that choices we made in the first subblock
// (for now only the return register) must be maintained in the next
// subblocks.
pub fn reset_block(ctx: &mut Context, parent_block_state: BlockState) {
let return_reg = ctx.block_state.return_register;
ctx.block_state = parent_block_state;
ctx.block_state.return_register = return_reg;
}
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
// TODO: This is currently never called, but is important for if we want to
// have a more complex stack spilling scheme.
if ctx.block_state.depth != parent_block_state.depth {
@@ -445,10 +471,12 @@ pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32)
);
}
let return_reg = ctx.block_state.return_register;
ctx.block_state = parent_block_state;
if arity > 0 {
push_return_value(ctx);
if let Some(reg) = return_reg {
ctx.block_state.regs.mark_used(reg);
ctx.block_state.stack.push(StackValue::Temp(reg));
}
}
@@ -457,7 +485,11 @@ pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32)
// use that one. This will mean that `(block ...)` is no less efficient than `...`
// alone, and you only pay for the shuffling of registers in the case that you use
// `BrIf` or similar.
pub fn push_return_value(ctx: &mut Context) {
fn push_return_value(ctx: &mut Context, arity: u32) {
if arity == 0 {
return;
}
assert_eq!(arity, 1);
ctx.block_state.regs.mark_used(RAX);
ctx.block_state.stack.push(StackValue::Temp(RAX));
}
@@ -662,10 +694,9 @@ macro_rules! commutative_binop {
; $instr Rd(op1), [rsp + offset]
);
}
ValueLocation::Immediate(offset) => {
let offset = adjusted_offset(ctx, offset);
ValueLocation::Immediate(i) => {
dynasm!(ctx.asm
; $instr Rd(op1), [rsp + offset]
; $instr Rd(op1), i
);
}
}
@@ -677,11 +708,50 @@ macro_rules! commutative_binop {
}
commutative_binop!(i32_add, add, i32::wrapping_add);
commutative_binop!(i32_and, and, |a, b| a & b);
commutative_binop!(i32_or, or, |a, b| a | b);
commutative_binop!(i32_xor, xor, |a, b| a ^ b);
commutative_binop!(i32_mul, imul, i32::wrapping_mul);
pub fn i32_mul(ctx: &mut Context) {
let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx);
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
ctx.block_state
.stack
.push(StackValue::Immediate(i32::wrapping_mul(i1, i0)));
return;
}
}
let (op1, op0) = match op1 {
Value::Temp(reg) => (reg, op0),
_ => (into_temp_reg(ctx, op0), op1),
};
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; imul Rd(op1), Rd(reg)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; imul Rd(op1), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
dynasm!(ctx.asm
; imul Rd(op1), Rd(op1), i
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_value(ctx, op0);
}
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output)
@@ -927,19 +997,14 @@ fn free_register(ctx: &mut Context, reg: GPR) {
return;
}
// TODO: With real stack allocation we can make this constant-time
for stack_val in ctx.block_state.stack.iter_mut().rev() {
match stack_val.location(&ctx.block_state.locals) {
// For now it's impossible for a local to be in RAX but that might be
// possible in the future, so we check both cases.
Some(ValueLocation::Reg(r)) if r == reg => {
*stack_val = if ctx.block_state.regs.free_scratch() > 1 {
let gpr = ctx.block_state.regs.take_scratch_gpr();
assert!(gpr != RAX, "RAX in stack but marked as free");
StackValue::Temp(gpr)
} else {
ctx.block_state.depth.reserve(1);
StackValue::Pop
};
ctx.block_state.depth.reserve(1);
*stack_val = StackValue::Pop;
out = Some(*stack_val);
@@ -998,9 +1063,11 @@ fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> {
/// Write the arguments to the callee to the registers and the stack using the SystemV
/// calling convention.
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
fn pass_outgoing_args(ctx: &mut Context, arity: u32, return_arity: u32) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
free_arg_registers(ctx, arity);
// We pop stack arguments first - arguments are RTL
if num_stack_args > 0 {
let size = num_stack_args * WORD_SIZE as i32;
@@ -1032,6 +1099,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
pop_i32_into(ctx, ValueLocation::Reg(*reg));
}
// We do this before doing `save_volatile`, since otherwise we'll trample the return value
// of the call when we pop back.
free_return_register(ctx, return_arity);
CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
@@ -1063,12 +1134,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity:
"We don't support multiple return yet"
);
free_arg_registers(ctx, arg_arity);
if return_arity > 0 {
free_return_register(ctx, return_arity);
}
let cleanup = pass_outgoing_args(ctx, arg_arity);
let cleanup = pass_outgoing_args(ctx, arg_arity, return_arity);
let label = &ctx.func_starts[index as usize].1;
dynasm!(ctx.asm
@@ -1076,10 +1142,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity:
);
post_call_cleanup(ctx, cleanup);
if return_arity > 0 {
push_return_value(ctx);
}
push_return_value(ctx, return_arity);
}
#[must_use]
@@ -1106,6 +1169,7 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) -> Functio
reg_args.iter().cloned().map(ValueLocation::Reg).collect();
ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
ctx.block_state.locals.num_local_stack_slots = locals;
ctx.block_state.return_register = Some(RAX);
ctx.block_state.parent_locals = ctx.block_state.locals.clone();
// ctx.block_state.depth.reserve(aligned_stack_slots - locals);

View File

@@ -124,7 +124,7 @@ pub fn translate(
// Upon entering the function implicit frame for function body is pushed. It has the same
// result type as the function itself. Branching to it is equivalent to returning from the function.
let epilogue_label = create_label(ctx);
let function_block_state = start_block(ctx, arity(return_ty));
let function_block_state = start_block(ctx);
control_frames.push(ControlFrame::new(
ControlFrameKind::Block {
end_label: epilogue_label,
@@ -157,7 +157,7 @@ pub fn translate(
}
Operator::Block { ty } => {
let label = create_label(ctx);
let state = start_block(ctx, arity(ty));
let state = start_block(ctx);
control_frames.push(ControlFrame::new(
ControlFrameKind::Block { end_label: label },
state,
@@ -195,7 +195,7 @@ pub fn translate(
let if_not = create_label(ctx);
jump_if_equal_zero(ctx, if_not);
let state = start_block(ctx, arity(ty));
let state = start_block(ctx);
control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not },
@@ -206,7 +206,7 @@ pub fn translate(
Operator::Loop { ty } => {
let header = create_label(ctx);
let state = start_block(ctx, arity(ty));
let state = start_block(ctx);
define_label(ctx, header);
control_frames.push(ControlFrame::new(
@@ -224,7 +224,7 @@ pub fn translate(
..
}) => {
return_from_block(ctx, arity(ty), false);
end_block(ctx, block_state.clone(), arity(ty));
reset_block(ctx, block_state.clone());
// Finalize `then` block by jumping to the `end_label`.
br(ctx, end_label);
@@ -250,6 +250,7 @@ pub fn translate(
};
}
Operator::End => {
// TODO: Merge `End`s
let control_frame = control_frames.pop().expect("control stack is never empty");
let arity = control_frame.arity();
@@ -259,7 +260,7 @@ pub fn translate(
return_from_block(ctx, arity, control_frames.is_empty());
}
end_block(ctx, control_frame.block_state, arity);
end_block(ctx, control_frame.block_state);
if let Some(block_end) = control_frame.kind.block_end() {
define_label(ctx, block_end);

View File

@@ -471,7 +471,7 @@ fn fib() {
for x in 0..30 {
unsafe {
assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x));
assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x), "Failed for x={}", x);
}
}
}