diff --git a/README.md b/README.md index f4394db1f0..c35fc40e67 100644 --- a/README.md +++ b/README.md @@ -127,37 +127,32 @@ fib: ret ``` -Whereas Lightbeam produces code with far fewer memory accesses than both (and fewer blocks than FireFox's output): +Whereas Lightbeam produces smaller code with far fewer memory accesses than both (and fewer blocks than FireFox's output): ```asm fib: - xor eax, eax cmp esi, 2 - setb al - mov ecx, 1 - test eax, eax - jne .Lreturn + mov eax, 1 + jb .Lreturn mov eax, 1 .Lloop: mov rcx, rsi add ecx, 0xffffffff push rsi push rax + push rax mov rsi, rcx - call 0 - add eax, dword ptr [rsp] - mov rcx, qword ptr [rsp + 8] + call fib + add eax, dword ptr [rsp + 8] + mov rcx, qword ptr [rsp + 0x10] add ecx, 0xfffffffe - xor edx, edx cmp ecx, 1 - seta dl mov rsi, rcx - add rsp, 0x10 - test edx, edx - jne .Lloop - mov rcx, rax + pop rcx + pop rcx + pop rcx + ja .Lloop .Lreturn: - mov rax, rcx ret ``` diff --git a/src/backend.rs b/src/backend.rs index 7376f2459e..bfa71f33c8 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -2362,6 +2362,38 @@ impl<'this, M: ModuleContext> Context<'this, M> { } } + pub fn serialize_block_args( + &mut self, + cc: &BlockCallingConvention, + other_to_drop: Option>, + ) -> BlockCallingConvention { + self.do_pass_block_args(cc); + + let mut out_args = cc.arguments.clone(); + + out_args.reverse(); + + if let Some(to_drop) = other_to_drop { + for _ in to_drop { + let val = self.pop(); + // TODO: We can use stack slots for values already on the stack but we + // don't refcount stack slots right now + let loc = CCLoc::Reg(self.into_temp_reg(None, val)); + + out_args.push(loc); + } + } + + out_args.reverse(); + + self.set_stack_depth(cc.stack_depth); + + BlockCallingConvention { + stack_depth: cc.stack_depth, + arguments: out_args, + } + } + /// Puts all stack values into "real" locations so that they can i.e. be set to different /// values on different iterations of a loop pub fn serialize_args(&mut self, count: u32) -> BlockCallingConvention { @@ -3670,6 +3702,58 @@ impl<'this, M: ModuleContext> Context<'this, M> { self.push(out_val); } + pub fn i32_reinterpret_from_f32(&mut self) { + let val = self.pop(); + + let out = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_f32().unwrap().bits().into()) + } + val => val, + }; + + self.push(out); + } + + pub fn i64_reinterpret_from_f64(&mut self) { + let val = self.pop(); + + let out = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(imm.as_f64().unwrap().bits().into()) + } + val => val, + }; + + self.push(out); + } + + pub fn f32_reinterpret_from_i32(&mut self) { + let val = self.pop(); + + let out = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(wasmparser::Ieee32(imm.as_i32().unwrap() as _).into()) + } + val => val, + }; + + self.push(out); + } + + pub fn f64_reinterpret_from_i64(&mut self) { + let val = self.pop(); + + let out = match val { + ValueLocation::Immediate(imm) => { + ValueLocation::Immediate(wasmparser::Ieee64(imm.as_i64().unwrap() as _).into()) + } + val => val, + }; + + self.push(out); + } + unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64); // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands @@ -4970,4 +5054,3 @@ impl IntoLabel for (LabelValue, LabelValue) { Box::new(const_values(self.0, self.1)) } } - diff --git a/src/function_body.rs b/src/function_body.rs index 56085b9d98..e2fc3cb4d7 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -409,7 +409,9 @@ where if block.calling_convention.is_some() { assert!(cc.is_none(), "Can't pass different params to different elements of `br_table` yet"); - cc = block.calling_convention.clone(); + cc = block.calling_convention + .clone() + .map(|cc| (cc, target.to_drop.clone())); } if let Some(max) = max_num_callers { @@ -419,11 +421,12 @@ where max_params = max_params.max(block.params); } - if let Some(Left(cc)) = &cc { - ctx.pass_block_args(cc); - } - - let cc = cc.unwrap_or_else(|| + let cc = cc.map(|(cc, to_drop)| { + match cc { + Left(cc) => Left(ctx.serialize_block_args(&cc, to_drop)), + Right(cc) => Right(cc), + } + }).unwrap_or_else(|| if max_num_callers.map(|callers| callers <= 1).unwrap_or(false) { Right(ctx.virtual_calling_convention()) } else { @@ -547,11 +550,10 @@ where Operator::Drop(range) => ctx.drop(range), Operator::Const(val) => ctx.const_(val), Operator::I32WrapFromI64 => {} - // All reinterpret operators are no-ops - we do the conversion at the point of usage. - Operator::I32ReinterpretFromF32 => {} - Operator::I64ReinterpretFromF64 => {} - Operator::F32ReinterpretFromI32 => {} - Operator::F64ReinterpretFromI64 => {} + Operator::I32ReinterpretFromF32 => ctx.i32_reinterpret_from_f32(), + Operator::I64ReinterpretFromF64 => ctx.i64_reinterpret_from_f64(), + Operator::F32ReinterpretFromI32 => ctx.f32_reinterpret_from_i32(), + Operator::F64ReinterpretFromI64 => ctx.f64_reinterpret_from_i64(), Operator::ITruncFromF { input_ty: Size::_32, output_ty: sint::I32, diff --git a/src/microwasm.rs b/src/microwasm.rs index 80b8ea2c1f..aafa66725e 100644 --- a/src/microwasm.rs +++ b/src/microwasm.rs @@ -2098,4 +2098,3 @@ where })) } } -