diff --git a/src/backend.rs b/src/backend.rs index e6e3d6a141..4e3d7abf68 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -256,6 +256,9 @@ impl StackValue { #[derive(Debug, Default, Clone)] struct Locals { + // TODO: Store all places that the value can be read, so we can optimise + // passing (register) arguments along into a noop after saving their + // values. register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>, num_stack_args: u32, num_local_stack_slots: u32, @@ -791,10 +794,9 @@ pub fn i32_sub(ctx: &mut Context) { ; sub Rd(op1), [rsp + offset] ); } - ValueLocation::Immediate(offset) => { - let offset = adjusted_offset(ctx, offset); + ValueLocation::Immediate(i) => { dynasm!(ctx.asm - ; sub Rd(op1), [rsp + offset] + ; sub Rd(op1), i ); } } @@ -814,6 +816,10 @@ pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { let val_loc = val.location(&ctx.block_state.locals); let dst_loc = ctx.block_state.parent_locals.get(local_idx); + // TODO: We can have a specified stack depth where we always materialize locals, + // which would preserve linear runtime. + materialize_local(ctx, local_idx); + if let Some(cur) = ctx .block_state .locals @@ -823,9 +829,6 @@ pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { *cur = dst_loc; } - // TODO: We can have a specified stack depth where we always materialize locals, - // which would preserve linear runtime. - materialize_local(ctx, local_idx); copy_value(ctx, val_loc, dst_loc); free_value(ctx, val); } @@ -890,71 +893,86 @@ pub fn literal_i32(ctx: &mut Context, imm: i32) { push_i32(ctx, Value::Immediate(imm)); } -pub fn relop_eq_i32(ctx: &mut Context) { - let right = pop_i32(ctx); - let left = pop_i32(ctx); - let result = ctx.block_state.regs.take_scratch_gpr(); +macro_rules! cmp { + ($name:ident, $instr:ident, $const_fallback:expr) => { + pub fn $name(ctx: &mut Context) { + let right = pop_i32(ctx); + let left = pop_i32(ctx); - if let Some(i) = left.immediate() { - match right.location(&ctx.block_state.locals) { - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp DWORD [rsp + offset], i - ; sete Rb(result) - ); - } - ValueLocation::Reg(rreg) => { - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(rreg), i - ; sete Rb(result) - ); - } - ValueLocation::Immediate(right) => { - let is_equal = if i == right { 1i8 } else { 0 }; - dynasm!(ctx.asm - ; mov Rb(result), is_equal - ); - } - } - } else { - let lreg = into_reg(ctx, left); - match right.location(&ctx.block_state.locals) { - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(lreg), [rsp + offset] - ; sete Rb(result) - ); - } - ValueLocation::Reg(rreg) => { - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(lreg), Rd(rreg) - ; sete Rb(result) - ); - } - ValueLocation::Immediate(i) => { - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(lreg), i - ; sete Rb(result) - ); - } + let out = if let Some(i) = left.immediate() { + match right.location(&ctx.block_state.locals) { + ValueLocation::Stack(offset) => { + let result = ctx.block_state.regs.take_scratch_gpr(); + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp DWORD [rsp + offset], i + ; $instr Rb(result) + ); + Value::Temp(result) + } + ValueLocation::Reg(rreg) => { + let result = ctx.block_state.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(rreg), i + ; $instr Rb(result) + ); + Value::Temp(result) + } + ValueLocation::Immediate(right) => { + Value::Immediate(if $const_fallback(i, right) { 1 } else { 0 }) + } + } + } else { + let lreg = into_reg(ctx, left); + let result = ctx.block_state.regs.take_scratch_gpr(); + + match right.location(&ctx.block_state.locals) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), [rsp + offset] + ; $instr Rb(result) + ); + } + ValueLocation::Reg(rreg) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), Rd(rreg) + ; $instr Rb(result) + ); + } + ValueLocation::Immediate(i) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), i + ; $instr Rb(result) + ); + } + } + + Value::Temp(result) + }; + + push_i32(ctx, out); + free_value(ctx, left); + free_value(ctx, right); } } - - push_i32(ctx, Value::Temp(result)); - free_value(ctx, left); - free_value(ctx, right); } +cmp!(i32_eq, sete, |a, b| a == b); +cmp!(i32_neq, setne, |a, b| a != b); +cmp!(i32_lt, setl, |a, b| a == b); +cmp!(i32_le, setle, |a, b| a == b); +cmp!(i32_gt, setg, |a, b| a == b); +cmp!(i32_ge, setge, |a, b| a == b); + /// Pops i32 predicate and branches to the specified label /// if the predicate is equal to zero. -pub fn jump_if_equal_zero(ctx: &mut Context, label: Label) { +pub fn jump_if_false(ctx: &mut Context, label: Label) { let val = pop_i32(ctx); let predicate = into_temp_reg(ctx, val); dynasm!(ctx.asm @@ -1042,7 +1060,10 @@ fn free_arg_registers(ctx: &mut Context, count: u32) { match ctx.block_state.locals.register_arguments[i] { ValueLocation::Reg(reg) => { if ARGS_IN_GPRS.contains(®) { - let dst = ValueLocation::Stack((i as u32 * WORD_SIZE) as _); + let dst = ValueLocation::Stack( + ((ctx.block_state.locals.num_local_stack_slots - 1 - i as u32) * WORD_SIZE) + as _, + ); copy_value(ctx, ValueLocation::Reg(reg), dst); ctx.block_state.locals.register_arguments[i] = dst; } @@ -1230,17 +1251,18 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) -> Functio // We need space to store the register arguments if we need to call a function // and overwrite these registers so we add `reg_args.len()` - let locals = locals + reg_args.len() as u32; + let stack_slots = locals + reg_args.len() as u32; // Align stack slots to the nearest even number. This is required // by x86-64 ABI. - let aligned_stack_slots = (locals + 1) & !1; + let aligned_stack_slots = (stack_slots + 1) & !1; let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; ctx.block_state.locals.register_arguments = reg_args.iter().cloned().map(ValueLocation::Reg).collect(); ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); - ctx.block_state.locals.num_local_stack_slots = locals; + ctx.block_state.locals.num_local_stack_slots = stack_slots; ctx.block_state.return_register = Some(RAX); + ctx.block_state.parent_locals = ctx.block_state.locals.clone(); // ctx.block_state.depth.reserve(aligned_stack_slots - locals); diff --git a/src/function_body.rs b/src/function_body.rs index dd728eb356..6001c03903 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -136,14 +136,16 @@ pub fn translate( for op in operators { let op = op?; - if let Operator::End = op { - } else { - if control_frames - .last() - .expect("Control stack never empty") - .unreachable - { - continue; + match op { + Operator::End | Operator::Else => {} + _ => { + if control_frames + .last() + .expect("Control stack never empty") + .unreachable + { + continue; + } } } @@ -183,7 +185,7 @@ pub fn translate( let if_not = create_label(ctx); - jump_if_equal_zero(ctx, if_not); + jump_if_false(ctx, if_not); return_from_block(ctx, control_frame.arity(), idx == 0); br(ctx, control_frame.kind.branch_target()); @@ -194,7 +196,7 @@ pub fn translate( let end_label = create_label(ctx); let if_not = create_label(ctx); - jump_if_equal_zero(ctx, if_not); + jump_if_false(ctx, if_not); let state = start_block(ctx); control_frames.push(ControlFrame::new( @@ -206,8 +208,8 @@ pub fn translate( Operator::Loop { ty } => { let header = create_label(ctx); - let state = start_block(ctx); define_label(ctx, header); + let state = start_block(ctx); control_frames.push(ControlFrame::new( ControlFrameKind::Loop { header }, @@ -275,7 +277,12 @@ pub fn translate( define_label(ctx, if_not); } } - Operator::I32Eq => relop_eq_i32(ctx), + Operator::I32Eq => i32_eq(ctx), + Operator::I32Ne => i32_neq(ctx), + Operator::I32LtS => i32_lt(ctx), + Operator::I32LeS => i32_le(ctx), + Operator::I32GtS => i32_gt(ctx), + Operator::I32GeS => i32_ge(ctx), Operator::I32Add => i32_add(ctx), Operator::I32Sub => i32_sub(ctx), Operator::I32And => i32_and(ctx), diff --git a/src/tests.rs b/src/tests.rs index 2802a5b88d..ccb6a79557 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -374,6 +374,85 @@ fn spec_loop() { unsafe { translated.execute_func::<(), ()>(0, ()) } } +quickcheck! { + fn spec_fac(n: i32) -> bool { + const CODE: &str = r#" + (module + (func (param i32) (result i32) + (local i32) + (set_local 1 (call $fac-iter (get_local 0))) + (call $assert-eq (get_local 1) (call $fac-opt (get_local 0))) + (get_local 1) + ) + + (func $assert-eq (param i32) (param i32) + (if (i32.ne (get_local 0) (get_local 1)) + (unreachable) + ) + ) + + ;; Iterative factorial + (func $fac-iter (param i32) (result i32) + (local i32 i32) + (set_local 1 (get_local 0)) + (set_local 2 (i32.const 1)) + (block + (loop + (if + (i32.lt_s (get_local 1) (i32.const 2)) + (then (br 2)) + (else + (set_local 2 (i32.mul (get_local 1) (get_local 2))) + (set_local 1 (i32.sub (get_local 1) (i32.const 1))) + ) + ) + (br 0) + ) + ) + (get_local 2) + ) + + ;; Optimized factorial. + (func $fac-opt (param i32) (result i32) + (local i32) + (set_local 1 (i32.const 1)) + (block + (br_if 0 (i32.lt_s (get_local 0) (i32.const 2))) + (loop + (set_local 1 (i32.mul (get_local 1) (get_local 0))) + (set_local 0 (i32.add (get_local 0) (i32.const -1))) + (br_if 0 (i32.gt_s (get_local 0) (i32.const 1))) + ) + ) + (get_local 1) + ) + )"#; + + fn fac(mut n: i32) -> i32 { + let mut a = 1i32; + + while n > 1 { + a = a.wrapping_mul(n); + n -= 1; + } + + a + } + + lazy_static! { + static ref TRANSLATED: TranslatedModule = { + let out = translate_wat(CODE); + out.disassemble(); + out + }; + } + + unsafe { + TRANSLATED.execute_func::<(i32,), i32>(0, (n,)) == fac(n) + } + } +} + // Tests that br_if keeps values in the case if the branch // hasn't been taken. #[test] @@ -471,7 +550,12 @@ fn fib() { for x in 0..30 { unsafe { - assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x), "Failed for x={}", x); + assert_eq!( + translated.execute_func::<_, u32>(0, (x,)), + fib(x), + "Failed for x={}", + x + ); } } }