Avoid unnecessary work when passing arguments on
This commit is contained in:
152
src/backend.rs
152
src/backend.rs
@@ -144,7 +144,7 @@ const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
|
|||||||
const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11];
|
const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11];
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub struct Function {
|
pub struct FunctionEnd {
|
||||||
should_generate_epilogue: bool,
|
should_generate_epilogue: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -299,6 +299,79 @@ impl StackValue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||||
|
enum ArgLoc {
|
||||||
|
Register(GPR),
|
||||||
|
Stack(i32),
|
||||||
|
Both(GPR, i32),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArgLoc {
|
||||||
|
fn from_loc(loc: ValueLocation) -> Self {
|
||||||
|
match loc {
|
||||||
|
ValueLocation::Stack(offset) => ArgLoc::Stack(offset),
|
||||||
|
ValueLocation::Reg(gpr) => ArgLoc::Register(gpr),
|
||||||
|
_ => panic!("Unsupported local location"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn union(&mut self, other: ArgLoc) {
|
||||||
|
for l in other.locs() {
|
||||||
|
self.add_loc(l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn locs(&self) -> impl Iterator<Item = ValueLocation> {
|
||||||
|
self.reg()
|
||||||
|
.map(ValueLocation::Reg)
|
||||||
|
.into_iter()
|
||||||
|
.chain(self.stack().map(ValueLocation::Stack))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reg(&self) -> Option<GPR> {
|
||||||
|
match *self {
|
||||||
|
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => Some(gpr),
|
||||||
|
ArgLoc::Stack(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn stack(&self) -> Option<i32> {
|
||||||
|
match *self {
|
||||||
|
ArgLoc::Stack(o) | ArgLoc::Both(_, o) => Some(o),
|
||||||
|
ArgLoc::Register(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_loc(&mut self, loc: ValueLocation) {
|
||||||
|
match loc {
|
||||||
|
ValueLocation::Stack(offset) => self.add_stack(offset),
|
||||||
|
ValueLocation::Reg(gpr) => self.add_reg(gpr),
|
||||||
|
_ => panic!("Unsupported local location"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_stack(&mut self, offset: i32) {
|
||||||
|
*self = match *self {
|
||||||
|
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => ArgLoc::Both(gpr, offset),
|
||||||
|
ArgLoc::Stack(_) => ArgLoc::Stack(offset),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_reg(&mut self, gpr: GPR) {
|
||||||
|
*self = match *self {
|
||||||
|
ArgLoc::Stack(offset) | ArgLoc::Both(_, offset) => ArgLoc::Both(gpr, offset),
|
||||||
|
ArgLoc::Register(_) => ArgLoc::Register(gpr),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn best_loc(&self) -> ValueLocation {
|
||||||
|
match *self {
|
||||||
|
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => ValueLocation::Reg(gpr),
|
||||||
|
ArgLoc::Stack(offset) => ValueLocation::Stack(offset),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A store for the local values for our function, including arguments.
|
/// A store for the local values for our function, including arguments.
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
pub struct Locals {
|
pub struct Locals {
|
||||||
@@ -309,7 +382,7 @@ pub struct Locals {
|
|||||||
/// registers that this can contain. If we need to move the argument
|
/// registers that this can contain. If we need to move the argument
|
||||||
/// out of a register (for example, because we're calling a function)
|
/// out of a register (for example, because we're calling a function)
|
||||||
/// we note that down here, so we don't have to move it back afterwards.
|
/// we note that down here, so we don't have to move it back afterwards.
|
||||||
register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>,
|
register_arguments: ArrayVec<[ArgLoc; ARGS_IN_GPRS.len()]>,
|
||||||
/// The number of arguments stored on the stack.
|
/// The number of arguments stored on the stack.
|
||||||
num_stack_args: u32,
|
num_stack_args: u32,
|
||||||
/// The number of local stack slots, i.e. the amount of stack space reserved for locals.
|
/// The number of local stack slots, i.e. the amount of stack space reserved for locals.
|
||||||
@@ -325,14 +398,18 @@ impl Locals {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_pos(&mut self, index: u32, loc: ValueLocation) {
|
||||||
|
self.register_arguments[index as usize].add_loc(loc);
|
||||||
|
}
|
||||||
|
|
||||||
fn set_pos(&mut self, index: u32, loc: ValueLocation) {
|
fn set_pos(&mut self, index: u32, loc: ValueLocation) {
|
||||||
self.register_arguments[index as usize] = loc;
|
self.register_arguments[index as usize] = ArgLoc::from_loc(loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get(&self, index: u32) -> ValueLocation {
|
fn get(&self, index: u32) -> ValueLocation {
|
||||||
self.register_arguments
|
self.register_arguments
|
||||||
.get(index as usize)
|
.get(index as usize)
|
||||||
.cloned()
|
.map(ArgLoc::best_loc)
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
let stack_index = index - self.register_arguments.len() as u32;
|
let stack_index = index - self.register_arguments.len() as u32;
|
||||||
if stack_index < self.num_stack_args {
|
if stack_index < self.num_stack_args {
|
||||||
@@ -890,6 +967,14 @@ impl Context<'_> {
|
|||||||
Label(self.asm.new_dynamic_label())
|
Label(self.asm.new_dynamic_label())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn define_host_fn(&mut self, host_fn: *const u8) {
|
||||||
|
dynasm!(self.asm
|
||||||
|
; mov rax, QWORD host_fn as i64
|
||||||
|
; call rax
|
||||||
|
; ret
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn adjusted_offset(&self, offset: i32) -> i32 {
|
fn adjusted_offset(&self, offset: i32) -> i32 {
|
||||||
(self.block_state.depth.0 * WORD_SIZE) as i32 + offset
|
(self.block_state.depth.0 * WORD_SIZE) as i32 + offset
|
||||||
}
|
}
|
||||||
@@ -1184,7 +1269,7 @@ impl Context<'_> {
|
|||||||
.iter()
|
.iter()
|
||||||
.zip(&locals.register_arguments)
|
.zip(&locals.register_arguments)
|
||||||
{
|
{
|
||||||
self.copy_value(*src, *dst);
|
self.copy_value(src.best_loc(), dst.best_loc());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (src, dst) in self
|
for (src, dst) in self
|
||||||
@@ -1194,7 +1279,7 @@ impl Context<'_> {
|
|||||||
.iter_mut()
|
.iter_mut()
|
||||||
.zip(&locals.register_arguments)
|
.zip(&locals.register_arguments)
|
||||||
{
|
{
|
||||||
*src = *dst;
|
src.union(*dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1615,7 +1700,7 @@ impl Context<'_> {
|
|||||||
.register_arguments
|
.register_arguments
|
||||||
.get_mut(local_idx as usize)
|
.get_mut(local_idx as usize)
|
||||||
{
|
{
|
||||||
*cur = dst_loc;
|
*cur = ArgLoc::from_loc(dst_loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.copy_value(val_loc, dst_loc);
|
self.copy_value(val_loc, dst_loc);
|
||||||
@@ -1636,7 +1721,7 @@ impl Context<'_> {
|
|||||||
.register_arguments
|
.register_arguments
|
||||||
.get_mut(local_idx as usize)
|
.get_mut(local_idx as usize)
|
||||||
{
|
{
|
||||||
*cur = dst_loc;
|
*cur = ArgLoc::from_loc(dst_loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.copy_value(val_loc, dst_loc);
|
self.copy_value(val_loc, dst_loc);
|
||||||
@@ -1715,23 +1800,25 @@ impl Context<'_> {
|
|||||||
/// Unfortunately, we can't elide this store if we're just passing arguments on
|
/// Unfortunately, we can't elide this store if we're just passing arguments on
|
||||||
/// because these registers are caller-saved and so the callee can use them as
|
/// because these registers are caller-saved and so the callee can use them as
|
||||||
/// scratch space.
|
/// scratch space.
|
||||||
fn free_arg_registers(&mut self, count: u32) {
|
fn free_arg_registers(&mut self, arity: u32) {
|
||||||
if count == 0 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant
|
// This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant
|
||||||
// runtime
|
// runtime
|
||||||
for i in 0..self.block_state.locals.register_arguments.len() {
|
for i in 0..self
|
||||||
|
.block_state
|
||||||
|
.locals
|
||||||
|
.register_arguments
|
||||||
|
.len()
|
||||||
|
.min(arity as usize)
|
||||||
|
{
|
||||||
match self.block_state.locals.register_arguments[i] {
|
match self.block_state.locals.register_arguments[i] {
|
||||||
ValueLocation::Reg(reg) => {
|
ArgLoc::Register(reg) => {
|
||||||
if ARGS_IN_GPRS.contains(®) {
|
if ARGS_IN_GPRS.contains(®) {
|
||||||
let dst = ValueLocation::Stack(
|
let offset =
|
||||||
((self.block_state.locals.num_local_stack_slots - 1 - i as u32)
|
((self.block_state.locals.num_local_stack_slots - 1 - i as u32)
|
||||||
* WORD_SIZE) as _,
|
* WORD_SIZE) as _;
|
||||||
);
|
let dst = ValueLocation::Stack(offset);
|
||||||
self.copy_value(ValueLocation::Reg(reg), dst);
|
self.copy_value(ValueLocation::Reg(reg), dst);
|
||||||
self.block_state.locals.register_arguments[i] = dst;
|
self.block_state.locals.register_arguments[i].add_stack(offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
@@ -1821,10 +1908,15 @@ impl Context<'_> {
|
|||||||
|
|
||||||
/// Write the arguments to the callee to the registers and the stack using the SystemV
|
/// Write the arguments to the callee to the registers and the stack using the SystemV
|
||||||
/// calling convention.
|
/// calling convention.
|
||||||
fn pass_outgoing_args(&mut self, arity: u32, return_arity: u32) -> CallCleanup {
|
fn pass_outgoing_args(
|
||||||
|
&mut self,
|
||||||
|
arity: u32,
|
||||||
|
return_arity: u32,
|
||||||
|
has_vmctx: bool,
|
||||||
|
) -> CallCleanup {
|
||||||
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
|
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
|
||||||
|
|
||||||
self.free_arg_registers(arity);
|
self.free_arg_registers(if has_vmctx { arity - 1} else { arity } );
|
||||||
|
|
||||||
// We pop stack arguments first - arguments are RTL
|
// We pop stack arguments first - arguments are RTL
|
||||||
if num_stack_args > 0 {
|
if num_stack_args > 0 {
|
||||||
@@ -1879,6 +1971,14 @@ impl Context<'_> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If these values were in register they've now been invalidated, since
|
||||||
|
// the callee can use them as scratch.
|
||||||
|
for loc in self.block_state.locals.register_arguments.iter_mut() {
|
||||||
|
if let Some(offset) = loc.stack() {
|
||||||
|
*loc = ArgLoc::Stack(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for reg in cleanup.restore_registers.drain(..).rev() {
|
for reg in cleanup.restore_registers.drain(..).rev() {
|
||||||
dynasm!(self.asm
|
dynasm!(self.asm
|
||||||
; pop Rq(reg)
|
; pop Rq(reg)
|
||||||
@@ -1904,7 +2004,7 @@ impl Context<'_> {
|
|||||||
|
|
||||||
let vmctx = Value::Local(self.block_state.locals.vmctx_index());
|
let vmctx = Value::Local(self.block_state.locals.vmctx_index());
|
||||||
self.push(vmctx);
|
self.push(vmctx);
|
||||||
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity);
|
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true);
|
||||||
|
|
||||||
let label = &self.func_starts[index as usize].1;
|
let label = &self.func_starts[index as usize].1;
|
||||||
dynasm!(self.asm
|
dynasm!(self.asm
|
||||||
@@ -1919,7 +2019,7 @@ impl Context<'_> {
|
|||||||
// as scratch registers
|
// as scratch registers
|
||||||
// TODO: Allow use of unused argument registers as scratch registers.
|
// TODO: Allow use of unused argument registers as scratch registers.
|
||||||
/// Writes the function prologue and stores the arguments as locals
|
/// Writes the function prologue and stores the arguments as locals
|
||||||
pub fn start_function(&mut self, arguments: u32, locals: u32) -> Function {
|
pub fn start_function(&mut self, arguments: u32, locals: u32) -> FunctionEnd {
|
||||||
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
|
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
|
||||||
|
|
||||||
// We need space to store the register arguments if we need to call a function
|
// We need space to store the register arguments if we need to call a function
|
||||||
@@ -1931,7 +2031,7 @@ impl Context<'_> {
|
|||||||
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
||||||
|
|
||||||
self.block_state.locals.register_arguments =
|
self.block_state.locals.register_arguments =
|
||||||
reg_args.iter().cloned().map(ValueLocation::Reg).collect();
|
reg_args.iter().cloned().map(ArgLoc::Register).collect();
|
||||||
self.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
|
self.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
|
||||||
self.block_state.locals.num_local_stack_slots = stack_slots;
|
self.block_state.locals.num_local_stack_slots = stack_slots;
|
||||||
self.block_state.return_register = Some(RAX);
|
self.block_state.return_register = Some(RAX);
|
||||||
@@ -1946,14 +2046,14 @@ impl Context<'_> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Function {
|
FunctionEnd {
|
||||||
should_generate_epilogue,
|
should_generate_epilogue,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writes the function epilogue, restoring the stack pointer and returning to the
|
/// Writes the function epilogue, restoring the stack pointer and returning to the
|
||||||
/// caller.
|
/// caller.
|
||||||
pub fn epilogue(&mut self, func: Function) {
|
pub fn epilogue(&mut self, func: FunctionEnd) {
|
||||||
// We don't need to clean up the stack - RSP is restored and
|
// We don't need to clean up the stack - RSP is restored and
|
||||||
// the calling function has its own register stack and will
|
// the calling function has its own register stack and will
|
||||||
// stomp on the registers from our stack if necessary.
|
// stomp on the registers from our stack if necessary.
|
||||||
|
|||||||
Reference in New Issue
Block a user