Integer division

This commit is contained in:
Jef
2019-02-28 18:09:22 +01:00
parent 677d54e46d
commit 71662af0fa
2 changed files with 276 additions and 54 deletions

View File

@@ -1570,7 +1570,7 @@ impl<M: ModuleContext> Context<'_, M> {
&mut self,
targets: I,
default: Option<BrTarget<Label>>,
mut pass_args: impl FnOnce(&mut Self),
pass_args: impl FnOnce(&mut Self),
) where
I: IntoIterator<Item = BrTarget<Label>>,
I::IntoIter: ExactSizeIterator,
@@ -2145,7 +2145,272 @@ impl<M: ModuleContext> Context<'_, M> {
I64
);
// `i64_mul` needs to be seperate because the immediate form of the instruction
/// Returned divisor is guaranteed not to be `RAX`
// TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have
// to move `RAX` back afterwards).
fn i32_full_div(
&mut self,
divisor: ValueLocation,
quotient: ValueLocation,
do_div: impl FnOnce(&mut Self, ValueLocation),
) -> (ValueLocation, ValueLocation, Option<GPR>) {
let divisor = if ValueLocation::Reg(RAX) == divisor {
let new_reg = self.block_state.regs.take(I32);
self.copy_value(&divisor, &mut ValueLocation::Reg(new_reg));
self.block_state.regs.release(RAX);
ValueLocation::Reg(new_reg)
} else if let ValueLocation::Stack(_) = divisor {
divisor
} else {
ValueLocation::Reg(self.into_temp_reg(I32, divisor))
};
self.free_value(quotient);
let should_save_rax = if self.block_state.regs.is_free(RAX) {
false
} else {
true
};
if let ValueLocation::Reg(r) = quotient {
self.block_state.regs.mark_used(r);
}
let saved_rax = if should_save_rax {
let new_reg = self.block_state.regs.take(I32);
dynasm!(self.asm
; mov Rq(new_reg.rq().unwrap()), rax
);
Some(new_reg)
} else {
None
};
do_div(self, divisor);
(divisor, ValueLocation::Reg(RAX), saved_rax)
}
fn i32_full_div_u(
&mut self,
divisor: ValueLocation,
quotient: ValueLocation,
) -> (ValueLocation, ValueLocation, Option<GPR>) {
self.i32_full_div(divisor, quotient, |this, divisor| match divisor {
ValueLocation::Stack(offset) => {
let offset = this.adjusted_offset(offset);
dynasm!(this.asm
; div [rsp + offset]
);
}
ValueLocation::Reg(r) => {
dynasm!(this.asm
; div Rq(r.rq().unwrap())
);
}
ValueLocation::Immediate(_) => unreachable!(),
})
}
fn i32_full_div_s(
&mut self,
divisor: ValueLocation,
quotient: ValueLocation,
) -> (ValueLocation, ValueLocation, Option<GPR>) {
self.i32_full_div(divisor, quotient, |this, divisor| match divisor {
ValueLocation::Stack(offset) => {
let offset = this.adjusted_offset(offset);
dynasm!(this.asm
; idiv [rsp + offset]
);
}
ValueLocation::Reg(r) => {
dynasm!(this.asm
; idiv Rq(r.rq().unwrap())
);
}
ValueLocation::Immediate(_) => unreachable!(),
})
}
// TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
// emitting Wasm.
pub fn i32_div_u(&mut self) {
let divisor = self.pop();
let quotient = self.pop();
if let (Some(quotient), Some(divisor)) = (quotient.imm_i32(), divisor.imm_i32()) {
if divisor == 0 {
self.trap();
self.push(ValueLocation::Immediate(0u32.into()));
} else {
self.push(ValueLocation::Immediate(
u32::wrapping_div(quotient as _, divisor as _).into(),
));
}
return;
}
let (div, rem, saved_rax) = self.i32_full_div_u(divisor, quotient);
self.free_value(rem);
if let Some(saved) = saved_rax {
self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX));
self.block_state.regs.release(saved);
self.block_state.regs.mark_used(RAX);
}
self.push(div);
}
pub fn i32_rem_u(&mut self) {
let divisor = self.pop();
let quotient = self.pop();
if let (Some(quotient), Some(divisor)) = (quotient.imm_i32(), divisor.imm_i32()) {
if divisor == 0 {
self.trap();
self.push(ValueLocation::Immediate(0u32.into()));
} else {
self.push(ValueLocation::Immediate(
(quotient as u32 % divisor as u32).into(),
));
}
return;
}
let (div, rem, saved_rax) = self.i32_full_div_u(divisor, quotient);
self.free_value(div);
let rem = if let Some(saved) = saved_rax {
let new_gpr = self.block_state.regs.take(I32);
self.copy_value(&ValueLocation::Reg(RAX), &mut ValueLocation::Reg(new_gpr));
self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX));
self.block_state.regs.release(saved);
ValueLocation::Reg(new_gpr)
} else {
rem
};
self.push(rem);
}
pub fn i32_rem_s(&mut self) {
let divisor = self.pop();
let quotient = self.pop();
if let (Some(quotient), Some(divisor)) = (quotient.imm_i32(), divisor.imm_i32()) {
if divisor == 0 {
self.trap();
self.push(ValueLocation::Immediate(0u32.into()));
} else {
self.push(ValueLocation::Immediate((quotient % divisor).into()));
}
return;
}
let (div, rem, saved_rax) = self.i32_full_div_s(divisor, quotient);
self.free_value(div);
let rem = if let Some(saved) = saved_rax {
let new_gpr = self.block_state.regs.take(I32);
self.copy_value(&ValueLocation::Reg(RAX), &mut ValueLocation::Reg(new_gpr));
self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX));
self.block_state.regs.release(saved);
ValueLocation::Reg(new_gpr)
} else {
rem
};
self.push(rem);
}
// TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
// emitting Wasm.
pub fn i32_div_s(&mut self) {
let divisor = self.pop();
let quotient = self.pop();
if let (Some(quotient), Some(divisor)) = (quotient.imm_i32(), divisor.imm_i32()) {
if divisor == 0 {
self.trap();
self.push(ValueLocation::Immediate(0u32.into()));
} else {
self.push(ValueLocation::Immediate(
i32::wrapping_div(quotient, divisor).into(),
));
}
return;
}
let (div, rem, saved_rax) = self.i32_full_div_s(divisor, quotient);
self.free_value(rem);
if let Some(saved) = saved_rax {
self.copy_value(&ValueLocation::Reg(saved), &mut ValueLocation::Reg(RAX));
self.block_state.regs.release(saved);
self.block_state.regs.mark_used(RAX);
}
self.push(div);
}
// `i32_mul` needs to be separate because the immediate form of the instruction
// has a different syntax to the immediate form of the other instructions.
pub fn i32_mul(&mut self) {
let op0 = self.pop();
let op1 = self.pop();
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
self.push(ValueLocation::Immediate(
i32::wrapping_mul(i1.as_i32().unwrap(), i0.as_i32().unwrap()).into(),
));
return;
}
}
let (op1, op0) = match op1 {
ValueLocation::Reg(_) => (self.into_temp_reg(I32, op1), op0),
_ => {
if op0.immediate().is_some() {
(self.into_temp_reg(I32, op1), op0)
} else {
(self.into_temp_reg(I32, op0), op1)
}
}
};
match op0 {
ValueLocation::Reg(reg) => {
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), Rd(reg.rq().unwrap())
);
}
ValueLocation::Stack(offset) => {
let offset = self.adjusted_offset(offset);
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), Rd(op1.rq().unwrap()), i.as_i32().unwrap()
);
}
}
self.push(ValueLocation::Reg(op1));
self.free_value(op0);
}
// `i64_mul` needs to be separate because the immediate form of the instruction
// has a different syntax to the immediate form of the other instructions.
pub fn i64_mul(&mut self) {
let op0 = self.pop();
@@ -2201,55 +2466,6 @@ impl<M: ModuleContext> Context<'_, M> {
self.free_value(op0);
}
// `i32_mul` needs to be seperate because the immediate form of the instruction
// has a different syntax to the immediate form of the other instructions.
pub fn i32_mul(&mut self) {
let op0 = self.pop();
let op1 = self.pop();
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
self.push(ValueLocation::Immediate(
i32::wrapping_mul(i1.as_i32().unwrap(), i0.as_i32().unwrap()).into(),
));
return;
}
}
let (op1, op0) = match op1 {
ValueLocation::Reg(_) => (self.into_temp_reg(I32, op1), op0),
_ => {
if op0.immediate().is_some() {
(self.into_temp_reg(I32, op1), op0)
} else {
(self.into_temp_reg(I32, op0), op1)
}
}
};
match op0 {
ValueLocation::Reg(reg) => {
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), Rd(reg.rq().unwrap())
);
}
ValueLocation::Stack(offset) => {
let offset = self.adjusted_offset(offset);
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
dynasm!(self.asm
; imul Rd(op1.rq().unwrap()), Rd(op1.rq().unwrap()), i.as_i32().unwrap()
);
}
}
self.push(ValueLocation::Reg(op1));
self.free_value(op0);
}
pub fn select(&mut self) {
let cond = self.pop();
let else_ = self.pop();

View File

@@ -36,7 +36,7 @@ where
{
let ty = session.module_context.func_type(func_idx);
if true {
if false {
let mut microwasm = vec![];
let microwasm_conv = MicrowasmConv::new(
@@ -137,7 +137,7 @@ where
let block = entry.get_mut();
// TODO: Is it possible with arbitrary CFGs that a block will have _only_ backwards callers?
// Certainly for Wasm that is currently impossible.
// Certainly for Microwasm generated from Wasm that is currently impossible.
if block.actual_num_callers == 0 {
loop {
let done = match body.peek() {
@@ -245,8 +245,10 @@ where
}
}
Operator::BrIf { then, else_ } => {
// TODO: We should add the block to the hashmap if we don't have it already
let (then_block, else_block) = blocks.pair_mut(&then, &else_);
// TODO: If actual_num_callers == num_callers then we can remove this block from the hashmap.
// This frees memory and acts as a kind of verification that `num_callers` is set
// correctly. It doesn't help for loops and block ends generated from Wasm.
then_block.actual_num_callers += 1;
else_block.actual_num_callers += 1;
@@ -379,6 +381,10 @@ where
Operator::Or(Size::_32) => ctx.i32_or(),
Operator::Xor(Size::_32) => ctx.i32_xor(),
Operator::Mul(I32) => ctx.i32_mul(),
Operator::Div(SU32) => ctx.i32_div_u(),
Operator::Div(SI32) => ctx.i32_div_s(),
Operator::Rem(sint::I32) => ctx.i32_rem_u(),
Operator::Rem(sint::U32) => ctx.i32_rem_s(),
Operator::Shl(Size::_32) => ctx.i32_shl(),
Operator::Shr(sint::I32) => ctx.i32_shr_s(),
Operator::Shr(sint::U32) => ctx.i32_shr_u(),