Allow blocks to return values in any register

This commit is contained in:
Jef
2018-12-18 17:16:38 +01:00
parent 74ffb8560c
commit 5418241dc6
3 changed files with 104 additions and 39 deletions

View File

@@ -211,7 +211,6 @@ impl TranslatedCodeSection {
} }
} }
// TODO: Immediates? We could implement on-the-fly const folding
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Debug, Copy, Clone, PartialEq)]
enum Value { enum Value {
Local(u32), Local(u32),
@@ -287,6 +286,7 @@ pub struct BlockState {
// TODO: `BitVec` // TODO: `BitVec`
stack_map: Vec<bool>, stack_map: Vec<bool>,
depth: StackDepth, depth: StackDepth,
return_register: Option<GPR>,
regs: Registers, regs: Registers,
/// This is the _current_ locals, since we can shuffle them about during function calls. /// This is the _current_ locals, since we can shuffle them about during function calls.
/// We will restore this to be the same state as the `Locals` in `Context` at the end /// We will restore this to be the same state as the `Locals` in `Context` at the end
@@ -426,17 +426,43 @@ pub fn return_from_block(ctx: &mut Context, arity: u32, is_function_end: bool) {
} }
let stack_top = *ctx.block_state.stack.last().expect("Stack is empty"); let stack_top = *ctx.block_state.stack.last().expect("Stack is empty");
put_stack_val_into(ctx, stack_top, ValueLocation::Reg(RAX)) if let Some(reg) = ctx.block_state.return_register {
put_stack_val_into(ctx, stack_top, ValueLocation::Reg(reg));
} else {
let out_reg = match stack_top {
StackValue::Temp(r) => r,
other => {
let new_scratch = ctx.block_state.regs.take_scratch_gpr();
put_stack_val_into(ctx, other, ValueLocation::Reg(new_scratch));
new_scratch
}
};
ctx.block_state.return_register = Some(out_reg);
}
} }
pub fn start_block(ctx: &mut Context, arity: u32) -> BlockState { pub fn start_block(ctx: &mut Context) -> BlockState {
free_return_register(ctx, arity); // free_return_register(ctx, arity);
let current_state = ctx.block_state.clone(); let current_state = ctx.block_state.clone();
ctx.block_state.parent_locals = ctx.block_state.locals.clone(); ctx.block_state.parent_locals = ctx.block_state.locals.clone();
ctx.block_state.return_register = None;
current_state current_state
} }
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32) { // To start the next subblock of a block (for `if..then..else..end`).
// The only difference is that choices we made in the first subblock
// (for now only the return register) must be maintained in the next
// subblocks.
pub fn reset_block(ctx: &mut Context, parent_block_state: BlockState) {
let return_reg = ctx.block_state.return_register;
ctx.block_state = parent_block_state;
ctx.block_state.return_register = return_reg;
}
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
// TODO: This is currently never called, but is important for if we want to // TODO: This is currently never called, but is important for if we want to
// have a more complex stack spilling scheme. // have a more complex stack spilling scheme.
if ctx.block_state.depth != parent_block_state.depth { if ctx.block_state.depth != parent_block_state.depth {
@@ -445,10 +471,12 @@ pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32)
); );
} }
let return_reg = ctx.block_state.return_register;
ctx.block_state = parent_block_state; ctx.block_state = parent_block_state;
if arity > 0 { if let Some(reg) = return_reg {
push_return_value(ctx); ctx.block_state.regs.mark_used(reg);
ctx.block_state.stack.push(StackValue::Temp(reg));
} }
} }
@@ -457,7 +485,11 @@ pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32)
// use that one. This will mean that `(block ...)` is no less efficient than `...` // use that one. This will mean that `(block ...)` is no less efficient than `...`
// alone, and you only pay for the shuffling of registers in the case that you use // alone, and you only pay for the shuffling of registers in the case that you use
// `BrIf` or similar. // `BrIf` or similar.
pub fn push_return_value(ctx: &mut Context) { fn push_return_value(ctx: &mut Context, arity: u32) {
if arity == 0 {
return;
}
assert_eq!(arity, 1);
ctx.block_state.regs.mark_used(RAX); ctx.block_state.regs.mark_used(RAX);
ctx.block_state.stack.push(StackValue::Temp(RAX)); ctx.block_state.stack.push(StackValue::Temp(RAX));
} }
@@ -662,10 +694,9 @@ macro_rules! commutative_binop {
; $instr Rd(op1), [rsp + offset] ; $instr Rd(op1), [rsp + offset]
); );
} }
ValueLocation::Immediate(offset) => { ValueLocation::Immediate(i) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
; $instr Rd(op1), [rsp + offset] ; $instr Rd(op1), i
); );
} }
} }
@@ -677,11 +708,50 @@ macro_rules! commutative_binop {
} }
commutative_binop!(i32_add, add, i32::wrapping_add); commutative_binop!(i32_add, add, i32::wrapping_add);
commutative_binop!(i32_and, and, |a, b| a & b); commutative_binop!(i32_and, and, |a, b| a & b);
commutative_binop!(i32_or, or, |a, b| a | b); commutative_binop!(i32_or, or, |a, b| a | b);
commutative_binop!(i32_xor, xor, |a, b| a ^ b); commutative_binop!(i32_xor, xor, |a, b| a ^ b);
commutative_binop!(i32_mul, imul, i32::wrapping_mul);
pub fn i32_mul(ctx: &mut Context) {
let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx);
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
ctx.block_state
.stack
.push(StackValue::Immediate(i32::wrapping_mul(i1, i0)));
return;
}
}
let (op1, op0) = match op1 {
Value::Temp(reg) => (reg, op0),
_ => (into_temp_reg(ctx, op0), op1),
};
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; imul Rd(op1), Rd(reg)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; imul Rd(op1), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
dynasm!(ctx.asm
; imul Rd(op1), Rd(op1), i
);
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_value(ctx, op0);
}
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` // `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output) // temp register as the output)
@@ -927,19 +997,14 @@ fn free_register(ctx: &mut Context, reg: GPR) {
return; return;
} }
// TODO: With real stack allocation we can make this constant-time
for stack_val in ctx.block_state.stack.iter_mut().rev() { for stack_val in ctx.block_state.stack.iter_mut().rev() {
match stack_val.location(&ctx.block_state.locals) { match stack_val.location(&ctx.block_state.locals) {
// For now it's impossible for a local to be in RAX but that might be // For now it's impossible for a local to be in RAX but that might be
// possible in the future, so we check both cases. // possible in the future, so we check both cases.
Some(ValueLocation::Reg(r)) if r == reg => { Some(ValueLocation::Reg(r)) if r == reg => {
*stack_val = if ctx.block_state.regs.free_scratch() > 1 { ctx.block_state.depth.reserve(1);
let gpr = ctx.block_state.regs.take_scratch_gpr(); *stack_val = StackValue::Pop;
assert!(gpr != RAX, "RAX in stack but marked as free");
StackValue::Temp(gpr)
} else {
ctx.block_state.depth.reserve(1);
StackValue::Pop
};
out = Some(*stack_val); out = Some(*stack_val);
@@ -998,9 +1063,11 @@ fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> {
/// Write the arguments to the callee to the registers and the stack using the SystemV /// Write the arguments to the callee to the registers and the stack using the SystemV
/// calling convention. /// calling convention.
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { fn pass_outgoing_args(ctx: &mut Context, arity: u32, return_arity: u32) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
free_arg_registers(ctx, arity);
// We pop stack arguments first - arguments are RTL // We pop stack arguments first - arguments are RTL
if num_stack_args > 0 { if num_stack_args > 0 {
let size = num_stack_args * WORD_SIZE as i32; let size = num_stack_args * WORD_SIZE as i32;
@@ -1032,6 +1099,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
pop_i32_into(ctx, ValueLocation::Reg(*reg)); pop_i32_into(ctx, ValueLocation::Reg(*reg));
} }
// We do this before doing `save_volatile`, since otherwise we'll trample the return value
// of the call when we pop back.
free_return_register(ctx, return_arity);
CallCleanup { CallCleanup {
stack_depth: num_stack_args, stack_depth: num_stack_args,
restore_registers: save_volatile(ctx), restore_registers: save_volatile(ctx),
@@ -1063,12 +1134,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity:
"We don't support multiple return yet" "We don't support multiple return yet"
); );
free_arg_registers(ctx, arg_arity); let cleanup = pass_outgoing_args(ctx, arg_arity, return_arity);
if return_arity > 0 {
free_return_register(ctx, return_arity);
}
let cleanup = pass_outgoing_args(ctx, arg_arity);
let label = &ctx.func_starts[index as usize].1; let label = &ctx.func_starts[index as usize].1;
dynasm!(ctx.asm dynasm!(ctx.asm
@@ -1076,10 +1142,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity:
); );
post_call_cleanup(ctx, cleanup); post_call_cleanup(ctx, cleanup);
push_return_value(ctx, return_arity);
if return_arity > 0 {
push_return_value(ctx);
}
} }
#[must_use] #[must_use]
@@ -1106,6 +1169,7 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) -> Functio
reg_args.iter().cloned().map(ValueLocation::Reg).collect(); reg_args.iter().cloned().map(ValueLocation::Reg).collect();
ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
ctx.block_state.locals.num_local_stack_slots = locals; ctx.block_state.locals.num_local_stack_slots = locals;
ctx.block_state.return_register = Some(RAX);
ctx.block_state.parent_locals = ctx.block_state.locals.clone(); ctx.block_state.parent_locals = ctx.block_state.locals.clone();
// ctx.block_state.depth.reserve(aligned_stack_slots - locals); // ctx.block_state.depth.reserve(aligned_stack_slots - locals);

View File

@@ -124,7 +124,7 @@ pub fn translate(
// Upon entering the function implicit frame for function body is pushed. It has the same // Upon entering the function implicit frame for function body is pushed. It has the same
// result type as the function itself. Branching to it is equivalent to returning from the function. // result type as the function itself. Branching to it is equivalent to returning from the function.
let epilogue_label = create_label(ctx); let epilogue_label = create_label(ctx);
let function_block_state = start_block(ctx, arity(return_ty)); let function_block_state = start_block(ctx);
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::Block { ControlFrameKind::Block {
end_label: epilogue_label, end_label: epilogue_label,
@@ -157,7 +157,7 @@ pub fn translate(
} }
Operator::Block { ty } => { Operator::Block { ty } => {
let label = create_label(ctx); let label = create_label(ctx);
let state = start_block(ctx, arity(ty)); let state = start_block(ctx);
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::Block { end_label: label }, ControlFrameKind::Block { end_label: label },
state, state,
@@ -195,7 +195,7 @@ pub fn translate(
let if_not = create_label(ctx); let if_not = create_label(ctx);
jump_if_equal_zero(ctx, if_not); jump_if_equal_zero(ctx, if_not);
let state = start_block(ctx, arity(ty)); let state = start_block(ctx);
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not }, ControlFrameKind::IfTrue { end_label, if_not },
@@ -206,7 +206,7 @@ pub fn translate(
Operator::Loop { ty } => { Operator::Loop { ty } => {
let header = create_label(ctx); let header = create_label(ctx);
let state = start_block(ctx, arity(ty)); let state = start_block(ctx);
define_label(ctx, header); define_label(ctx, header);
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
@@ -224,7 +224,7 @@ pub fn translate(
.. ..
}) => { }) => {
return_from_block(ctx, arity(ty), false); return_from_block(ctx, arity(ty), false);
end_block(ctx, block_state.clone(), arity(ty)); reset_block(ctx, block_state.clone());
// Finalize `then` block by jumping to the `end_label`. // Finalize `then` block by jumping to the `end_label`.
br(ctx, end_label); br(ctx, end_label);
@@ -250,6 +250,7 @@ pub fn translate(
}; };
} }
Operator::End => { Operator::End => {
// TODO: Merge `End`s
let control_frame = control_frames.pop().expect("control stack is never empty"); let control_frame = control_frames.pop().expect("control stack is never empty");
let arity = control_frame.arity(); let arity = control_frame.arity();
@@ -259,7 +260,7 @@ pub fn translate(
return_from_block(ctx, arity, control_frames.is_empty()); return_from_block(ctx, arity, control_frames.is_empty());
} }
end_block(ctx, control_frame.block_state, arity); end_block(ctx, control_frame.block_state);
if let Some(block_end) = control_frame.kind.block_end() { if let Some(block_end) = control_frame.kind.block_end() {
define_label(ctx, block_end); define_label(ctx, block_end);

View File

@@ -471,7 +471,7 @@ fn fib() {
for x in 0..30 { for x in 0..30 {
unsafe { unsafe {
assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x)); assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x), "Failed for x={}", x);
} }
} }
} }