Add 64-bit operations

This commit is contained in:
Jef
2019-01-07 19:17:29 +01:00
parent 5eee15ab02
commit fcd18746a7
3 changed files with 451 additions and 122 deletions

View File

@@ -46,7 +46,7 @@ const NUM_GPRS: u8 = 16;
impl GPRs { impl GPRs {
fn take(&mut self) -> GPR { fn take(&mut self) -> GPR {
let lz = self.bits.trailing_zeros(); let lz = self.bits.trailing_zeros();
assert!(lz < 16, "ran out of free GPRs"); debug_assert!(lz < 16, "ran out of free GPRs");
let gpr = lz as GPR; let gpr = lz as GPR;
self.mark_used(gpr); self.mark_used(gpr);
gpr gpr
@@ -57,7 +57,7 @@ impl GPRs {
} }
fn release(&mut self, gpr: GPR) { fn release(&mut self, gpr: GPR) {
assert!(!self.is_free(gpr), "released register was already free",); debug_assert!(!self.is_free(gpr), "released register was already free",);
self.bits |= 1 << gpr; self.bits |= 1 << gpr;
} }
@@ -129,18 +129,13 @@ enum ValueLocation {
/// before reading (as RSP may have been changed by `push`/`pop`). /// before reading (as RSP may have been changed by `push`/`pop`).
Stack(i32), Stack(i32),
/// Value is a literal (TODO: Support more than just `i32`) /// Value is a literal (TODO: Support more than just `i32`)
Immediate(i32), Immediate(i64),
} }
// TODO: This assumes only system-v calling convention. // TODO: This assumes only system-v calling convention.
// In system-v calling convention the first 6 arguments are passed via registers. // In system-v calling convention the first 6 arguments are passed via registers.
// All rest arguments are passed on the stack. // All rest arguments are passed on the stack.
const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
// RAX is reserved for return values. In the future we want a system to allow
// use of specific registers by saving/restoring them. This would allow using
// RAX as a scratch register when we're not calling a function, and would also
// allow us to call instructions that require specific registers.
//
// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11]; const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11];
@@ -166,7 +161,7 @@ impl CodeGenSession {
{ {
let func_start = &mut self.func_starts[func_idx as usize]; let func_start = &mut self.func_starts[func_idx as usize];
// At this point we now the exact start address of this function. Save it // At this point we know the exact start address of this function. Save it
// and define dynamic label at this location. // and define dynamic label at this location.
func_start.0 = Some(self.assembler.offset()); func_start.0 = Some(self.assembler.offset());
self.assembler.dynamic_label(func_start.1); self.assembler.dynamic_label(func_start.1);
@@ -217,11 +212,11 @@ impl TranslatedCodeSection {
enum Value { enum Value {
Local(u32), Local(u32),
Temp(GPR), Temp(GPR),
Immediate(i32), Immediate(i64),
} }
impl Value { impl Value {
fn immediate(&self) -> Option<i32> { fn immediate(&self) -> Option<i64> {
match *self { match *self {
Value::Immediate(i) => Some(i), Value::Immediate(i) => Some(i),
_ => None, _ => None,
@@ -241,7 +236,7 @@ impl Value {
enum StackValue { enum StackValue {
Local(u32), Local(u32),
Temp(GPR), Temp(GPR),
Immediate(i32), Immediate(i64),
Pop, Pop,
} }
@@ -480,7 +475,7 @@ pub fn reset_block(ctx: &mut Context, parent_block_state: BlockState) {
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) { pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
// TODO: This should currently never be called, but is important for if we want to // TODO: This should currently never be called, but is important for if we want to
// have a more complex stack spilling scheme. // have a more complex stack spilling scheme.
assert_eq!( debug_assert_eq!(
ctx.block_state.depth, parent_block_state.depth, ctx.block_state.depth, parent_block_state.depth,
"Imbalanced pushes and pops" "Imbalanced pushes and pops"
); );
@@ -499,15 +494,6 @@ pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
} }
} }
fn push_return_value(ctx: &mut Context, arity: u32) {
if arity == 0 {
return;
}
assert_eq!(arity, 1);
ctx.block_state.regs.mark_used(RAX);
ctx.block_state.stack.push(StackValue::Temp(RAX));
}
fn restore_locals(ctx: &mut Context) { fn restore_locals(ctx: &mut Context) {
for (src, dst) in ctx for (src, dst) in ctx
.block_state .block_state
@@ -521,7 +507,7 @@ fn restore_locals(ctx: &mut Context) {
} }
} }
fn push_i32(ctx: &mut Context, value: Value) { fn push(ctx: &mut Context, value: Value) {
let stack_loc = match value { let stack_loc = match value {
Value::Local(loc) => StackValue::Local(loc), Value::Local(loc) => StackValue::Local(loc),
Value::Immediate(i) => StackValue::Immediate(i), Value::Immediate(i) => StackValue::Immediate(i),
@@ -543,7 +529,7 @@ fn push_i32(ctx: &mut Context, value: Value) {
ctx.block_state.stack.push(stack_loc); ctx.block_state.stack.push(stack_loc);
} }
fn pop_i32(ctx: &mut Context) -> Value { fn pop(ctx: &mut Context) -> Value {
match ctx.block_state.stack.pop().expect("Stack is empty") { match ctx.block_state.stack.pop().expect("Stack is empty") {
StackValue::Local(loc) => Value::Local(loc), StackValue::Local(loc) => Value::Local(loc),
StackValue::Immediate(i) => Value::Immediate(i), StackValue::Immediate(i) => Value::Immediate(i),
@@ -607,7 +593,7 @@ pub fn drop(ctx: &mut Context) {
} }
} }
fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { fn pop_into(ctx: &mut Context, dst: ValueLocation) {
let val = ctx.block_state.stack.pop().expect("Stack is empty"); let val = ctx.block_state.stack.pop().expect("Stack is empty");
put_stack_val_into(ctx, val, dst); put_stack_val_into(ctx, val, dst);
} }
@@ -632,9 +618,7 @@ fn into_reg(ctx: &mut Context, val: Value) -> GPR {
} }
ValueLocation::Immediate(i) => { ValueLocation::Immediate(i) => {
let scratch = ctx.block_state.regs.take_scratch_gpr(); let scratch = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm immediate_to_reg(ctx, scratch, i);
; mov Rq(scratch), i
);
scratch scratch
} }
ValueLocation::Reg(reg) => reg, ValueLocation::Reg(reg) => reg,
@@ -670,9 +654,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
Value::Immediate(i) => { Value::Immediate(i) => {
let scratch = ctx.block_state.regs.take_scratch_gpr(); let scratch = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm immediate_to_reg(ctx, scratch, i);
; mov Rq(scratch), i
);
scratch scratch
} }
@@ -680,15 +662,15 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
} }
} }
macro_rules! commutative_binop { macro_rules! commutative_binop_i32 {
($name:ident, $instr:ident, $const_fallback:expr) => { ($name:ident, $instr:ident, $const_fallback:expr) => {
pub fn $name(ctx: &mut Context) { pub fn $name(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop(ctx);
let op1 = pop_i32(ctx); let op1 = pop(ctx);
if let Some(i1) = op1.immediate() { if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() { if let Some(i0) = op0.immediate() {
ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1, i0))); ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1 as i32, i0 as i32) as _));
return; return;
} }
} }
@@ -716,7 +698,7 @@ macro_rules! commutative_binop {
} }
ValueLocation::Immediate(i) => { ValueLocation::Immediate(i) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; $instr Rd(op1), i ; $instr Rd(op1), i as i32
); );
} }
} }
@@ -727,22 +709,78 @@ macro_rules! commutative_binop {
} }
} }
commutative_binop!(i32_add, add, i32::wrapping_add); macro_rules! commutative_binop_i64 {
commutative_binop!(i32_and, and, |a, b| a & b); ($name:ident, $instr:ident, $const_fallback:expr) => {
commutative_binop!(i32_or, or, |a, b| a | b); pub fn $name(ctx: &mut Context) {
commutative_binop!(i32_xor, xor, |a, b| a ^ b); let op0 = pop(ctx);
let op1 = pop(ctx);
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1, i0)));
return;
}
}
let (op1, op0) = match op1 {
Value::Temp(reg) => (reg, op0),
_ => if op0.immediate().is_some() {
(into_temp_reg(ctx, op1), op0)
} else {
(into_temp_reg(ctx, op0), op1)
}
};
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; $instr Rq(op1), Rq(reg)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; $instr Rq(op1), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
if (i as u64) <= u32::max_value() as u64 {
dynasm!(ctx.asm
; $instr Rq(op1), i as i32
);
} else {
unimplemented!(concat!("Unsupported `", stringify!($instr), "` with large 64-bit immediate operand"));
}
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_value(ctx, op0);
}
}
}
// TODO: Use `inc`/`dec` where possible?
commutative_binop_i32!(i32_add, add, |a, b| (a as i32).wrapping_add(b as i32));
commutative_binop_i32!(i32_and, and, |a, b| a & b);
commutative_binop_i32!(i32_or, or, |a, b| a | b);
commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b);
commutative_binop_i64!(i64_add, add, i64::wrapping_add);
commutative_binop_i64!(i64_and, and, |a, b| a & b);
commutative_binop_i64!(i64_or, or, |a, b| a | b);
commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b);
// `i32_mul` needs to be seperate because the immediate form of the instruction // `i32_mul` needs to be seperate because the immediate form of the instruction
// has a different syntax to the immediate form of the other instructions. // has a different syntax to the immediate form of the other instructions.
pub fn i32_mul(ctx: &mut Context) { pub fn i32_mul(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop(ctx);
let op1 = pop_i32(ctx); let op1 = pop(ctx);
if let Some(i1) = op1.immediate() { if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() { if let Some(i0) = op0.immediate() {
ctx.block_state ctx.block_state.stack.push(StackValue::Immediate(
.stack i32::wrapping_mul(i1 as i32, i0 as i32) as _,
.push(StackValue::Immediate(i32::wrapping_mul(i1, i0))); ));
return; return;
} }
} }
@@ -772,7 +810,7 @@ pub fn i32_mul(ctx: &mut Context) {
} }
ValueLocation::Immediate(i) => { ValueLocation::Immediate(i) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; imul Rd(op1), Rd(op1), i ; imul Rd(op1), Rd(op1), i as i32
); );
} }
} }
@@ -781,11 +819,109 @@ pub fn i32_mul(ctx: &mut Context) {
free_value(ctx, op0); free_value(ctx, op0);
} }
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output)
pub fn i64_sub(ctx: &mut Context) {
let op0 = pop(ctx);
let op1 = pop(ctx);
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
ctx.block_state.stack.push(StackValue::Immediate(i1 - i0));
return;
}
}
let op1 = into_temp_reg(ctx, op1);
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; sub Rq(op1), Rq(reg)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; sub Rq(op1), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
if (i as u64) <= u32::max_value() as u64 {
dynasm!(ctx.asm
; sub Rq(op1), i as i32
);
} else {
unimplemented!(concat!(
"Unsupported `sub` with large 64-bit immediate operand"
));
}
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_value(ctx, op0);
}
// `i64_mul` needs to be seperate because the immediate form of the instruction
// has a different syntax to the immediate form of the other instructions.
pub fn i64_mul(ctx: &mut Context) {
let op0 = pop(ctx);
let op1 = pop(ctx);
if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() {
ctx.block_state
.stack
.push(StackValue::Immediate(i64::wrapping_mul(i1, i0)));
return;
}
}
let (op1, op0) = match op1 {
Value::Temp(reg) => (reg, op0),
_ => {
if op0.immediate().is_some() {
(into_temp_reg(ctx, op1), op0)
} else {
(into_temp_reg(ctx, op0), op1)
}
}
};
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; imul Rq(op1), Rq(reg)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; imul Rq(op1), [rsp + offset]
);
}
ValueLocation::Immediate(i) => {
if (i as u64) <= u32::max_value() as u64 {
dynasm!(ctx.asm
; imul Rq(op1), Rq(op1), i as i32
);
} else {
unimplemented!(concat!(
"Unsupported `imul` with large 64-bit immediate operand"
));
}
}
}
ctx.block_state.stack.push(StackValue::Temp(op1));
free_value(ctx, op0);
}
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` // `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output) // temp register as the output)
pub fn i32_sub(ctx: &mut Context) { pub fn i32_sub(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop(ctx);
let op1 = pop_i32(ctx); let op1 = pop(ctx);
if let Some(i1) = op1.immediate() { if let Some(i1) = op1.immediate() {
if let Some(i0) = op0.immediate() { if let Some(i0) = op0.immediate() {
@@ -809,7 +945,7 @@ pub fn i32_sub(ctx: &mut Context) {
} }
ValueLocation::Immediate(i) => { ValueLocation::Immediate(i) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; sub Rd(op1), i ; sub Rd(op1), i as i32
); );
} }
} }
@@ -819,13 +955,13 @@ pub fn i32_sub(ctx: &mut Context) {
} }
pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
push_i32(ctx, Value::Local(local_idx)); push(ctx, Value::Local(local_idx));
} }
// TODO: We can put locals that were spilled to the stack // TODO: We can put locals that were spilled to the stack
// back into registers here. // back into registers here.
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
let val = pop_i32(ctx); let val = pop(ctx);
let val_loc = val.location(&ctx.block_state.locals); let val_loc = val.location(&ctx.block_state.locals);
let dst_loc = ctx.block_state.parent_locals.get(local_idx); let dst_loc = ctx.block_state.parent_locals.get(local_idx);
@@ -884,14 +1020,18 @@ fn materialize_local(ctx: &mut Context, local_idx: u32) {
} }
pub fn literal_i32(ctx: &mut Context, imm: i32) { pub fn literal_i32(ctx: &mut Context, imm: i32) {
push_i32(ctx, Value::Immediate(imm)); push(ctx, Value::Immediate(imm as _));
} }
macro_rules! cmp { pub fn literal_i64(ctx: &mut Context, imm: i64) {
push(ctx, Value::Immediate(imm));
}
macro_rules! cmp_i32 {
($name:ident, $instr:ident, $const_fallback:expr) => { ($name:ident, $instr:ident, $const_fallback:expr) => {
pub fn $name(ctx: &mut Context) { pub fn $name(ctx: &mut Context) {
let right = pop_i32(ctx); let right = pop(ctx);
let left = pop_i32(ctx); let left = pop(ctx);
let out = if let Some(i) = left.immediate() { let out = if let Some(i) = left.immediate() {
match right.location(&ctx.block_state.locals) { match right.location(&ctx.block_state.locals) {
@@ -899,8 +1039,8 @@ macro_rules! cmp {
let result = ctx.block_state.regs.take_scratch_gpr(); let result = ctx.block_state.regs.take_scratch_gpr();
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rd(result), Rd(result)
; cmp DWORD [rsp + offset], i ; cmp DWORD [rsp + offset], i as i32
; $instr Rb(result) ; $instr Rb(result)
); );
Value::Temp(result) Value::Temp(result)
@@ -908,8 +1048,78 @@ macro_rules! cmp {
ValueLocation::Reg(rreg) => { ValueLocation::Reg(rreg) => {
let result = ctx.block_state.regs.take_scratch_gpr(); let result = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rd(result), Rd(result)
; cmp Rd(rreg), i ; cmp Rd(rreg), i as i32
; $instr Rb(result)
);
Value::Temp(result)
}
ValueLocation::Immediate(right) => {
Value::Immediate(if $const_fallback(i as i32, right as i32) { 1 } else { 0 })
}
}
} else {
let lreg = into_reg(ctx, left);
let result = ctx.block_state.regs.take_scratch_gpr();
match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; xor Rd(result), Rd(result)
; cmp Rd(lreg), [rsp + offset]
; $instr Rb(result)
);
}
ValueLocation::Reg(rreg) => {
dynasm!(ctx.asm
; xor Rd(result), Rd(result)
; cmp Rd(lreg), Rd(rreg)
; $instr Rb(result)
);
}
ValueLocation::Immediate(i) => {
dynasm!(ctx.asm
; xor Rd(result), Rd(result)
; cmp Rd(lreg), i as i32
; $instr Rb(result)
);
}
}
Value::Temp(result)
};
push(ctx, out);
free_value(ctx, left);
free_value(ctx, right);
}
}
}
macro_rules! cmp_i64 {
($name:ident, $instr:ident, $const_fallback:expr) => {
pub fn $name(ctx: &mut Context) {
let right = pop(ctx);
let left = pop(ctx);
let out = if let Some(i) = left.immediate() {
match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => {
let result = ctx.block_state.regs.take_scratch_gpr();
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; xor Rd(result), Rd(result)
; cmp QWORD [rsp + offset], i as i32
; $instr Rb(result)
);
Value::Temp(result)
}
ValueLocation::Reg(rreg) => {
let result = ctx.block_state.regs.take_scratch_gpr();
dynasm!(ctx.asm
; xor Rd(result), Rd(result)
; cmp Rq(rreg), i as i32
; $instr Rb(result) ; $instr Rb(result)
); );
Value::Temp(result) Value::Temp(result)
@@ -926,53 +1136,69 @@ macro_rules! cmp {
ValueLocation::Stack(offset) => { ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rd(result), Rd(result)
; cmp Rd(lreg), [rsp + offset] ; cmp Rq(lreg), [rsp + offset]
; $instr Rb(result) ; $instr Rb(result)
); );
} }
ValueLocation::Reg(rreg) => { ValueLocation::Reg(rreg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rd(result), Rd(result)
; cmp Rd(lreg), Rd(rreg) ; cmp Rq(lreg), Rq(rreg)
; $instr Rb(result) ; $instr Rb(result)
); );
} }
ValueLocation::Immediate(i) => { ValueLocation::Immediate(i) => {
if (i as u64) <= u32::max_value() as u64 {
dynasm!(ctx.asm dynasm!(ctx.asm
; xor Rq(result), Rq(result) ; xor Rd(result), Rd(result)
; cmp Rd(lreg), i ; cmp Rq(lreg), i as i32
; $instr Rb(result) ; $instr Rb(result)
); );
} else {
unimplemented!("Have yet to implement `cmp` with imm64 operand");
}
} }
} }
Value::Temp(result) Value::Temp(result)
}; };
push_i32(ctx, out); push(ctx, out);
free_value(ctx, left); free_value(ctx, left);
free_value(ctx, right); free_value(ctx, right);
} }
} }
} }
cmp!(i32_eq, sete, |a, b| a == b); cmp_i32!(i32_eq, sete, |a, b| a == b);
cmp!(i32_neq, setne, |a, b| a != b); cmp_i32!(i32_neq, setne, |a, b| a != b);
// `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
cmp!(i32_lt_u, setnae, |a, b| (a as u32) < (b as u32)); cmp_i32!(i32_lt_u, setnae, |a, b| (a as u32) < (b as u32));
cmp!(i32_le_u, setbe, |a, b| (a as u32) <= (b as u32)); cmp_i32!(i32_le_u, setbe, |a, b| (a as u32) <= (b as u32));
cmp!(i32_gt_u, seta, |a, b| (a as u32) > (b as u32)); cmp_i32!(i32_gt_u, seta, |a, b| (a as u32) > (b as u32));
cmp!(i32_ge_u, setae, |a, b| (a as u32) >= (b as u32)); cmp_i32!(i32_ge_u, setae, |a, b| (a as u32) >= (b as u32));
cmp!(i32_lt_s, setl, |a, b| a < b); cmp_i32!(i32_lt_s, setl, |a, b| a < b);
cmp!(i32_le_s, setle, |a, b| a <= b); cmp_i32!(i32_le_s, setle, |a, b| a <= b);
cmp!(i32_gt_s, setg, |a, b| a == b); cmp_i32!(i32_gt_s, setg, |a, b| a == b);
cmp!(i32_ge_s, setge, |a, b| a == b); cmp_i32!(i32_ge_s, setge, |a, b| a == b);
cmp_i64!(i64_eq, sete, |a, b| a == b);
cmp_i64!(i64_neq, setne, |a, b| a != b);
// `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
cmp_i64!(i64_lt_u, setnae, |a, b| (a as u64) < (b as u64));
cmp_i64!(i64_le_u, setbe, |a, b| (a as u64) <= (b as u64));
cmp_i64!(i64_gt_u, seta, |a, b| (a as u64) > (b as u64));
cmp_i64!(i64_ge_u, setae, |a, b| (a as u64) >= (b as u64));
cmp_i64!(i64_lt_s, setl, |a, b| a < b);
cmp_i64!(i64_le_s, setle, |a, b| a <= b);
cmp_i64!(i64_gt_s, setg, |a, b| a == b);
cmp_i64!(i64_ge_s, setge, |a, b| a == b);
/// Pops i32 predicate and branches to the specified label /// Pops i32 predicate and branches to the specified label
/// if the predicate is equal to zero. /// if the predicate is equal to zero.
pub fn jump_if_false(ctx: &mut Context, label: Label) { pub fn jump_if_false(ctx: &mut Context, label: Label) {
let val = pop_i32(ctx); let val = pop(ctx);
let predicate = into_temp_reg(ctx, val); let predicate = into_temp_reg(ctx, val);
dynasm!(ctx.asm dynasm!(ctx.asm
; test Rd(predicate), Rd(predicate) ; test Rd(predicate), Rd(predicate)
@@ -988,6 +1214,20 @@ pub fn br(ctx: &mut Context, label: Label) {
); );
} }
fn immediate_to_reg(ctx: &mut Context, reg: GPR, val: i64) {
if (val as u64) <= u32::max_value() as u64 {
dynasm!(ctx.asm
; mov Rd(reg), val as i32
);
} else if reg == RAX {
dynasm!(ctx.asm
; movabs rax, val
);
} else {
unimplemented!("dynasm doesn't yet support mov r64, imm64");
}
}
fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
match (src, dst) { match (src, dst) {
(ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => { (ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => {
@@ -1011,8 +1251,14 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
(ValueLocation::Immediate(i), ValueLocation::Stack(out_offset)) => { (ValueLocation::Immediate(i), ValueLocation::Stack(out_offset)) => {
let out_offset = adjusted_offset(ctx, out_offset); let out_offset = adjusted_offset(ctx, out_offset);
dynasm!(ctx.asm dynasm!(ctx.asm
; mov DWORD [rsp + out_offset], i ; mov DWORD [rsp + out_offset], i as i32
); );
if (i as u64) > u32::max_value() as u64 {
let i = (i >> 4) as i32;
dynasm!(ctx.asm
; mov DWORD [rsp + out_offset + 4], i
);
}
} }
(ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => { (ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => {
let in_offset = adjusted_offset(ctx, in_offset); let in_offset = adjusted_offset(ctx, in_offset);
@@ -1028,9 +1274,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
} }
} }
(ValueLocation::Immediate(i), ValueLocation::Reg(out_reg)) => { (ValueLocation::Immediate(i), ValueLocation::Reg(out_reg)) => {
dynasm!(ctx.asm immediate_to_reg(ctx, out_reg, i);
; mov Rq(out_reg), i
);
} }
// TODO: Have separate `ReadLocation` and `WriteLocation`? // TODO: Have separate `ReadLocation` and `WriteLocation`?
(_, ValueLocation::Immediate(_)) => panic!("Tried to copy to an immediate value!"), (_, ValueLocation::Immediate(_)) => panic!("Tried to copy to an immediate value!"),
@@ -1121,7 +1365,7 @@ fn free_register(ctx: &mut Context, reg: GPR) {
// don't have to check this at all (i.e. order on the // don't have to check this at all (i.e. order on the
// physical stack and order on the logical stack should // physical stack and order on the logical stack should
// be independent). // be independent).
assert_eq!(to_repush, 0); debug_assert_eq!(to_repush, 0);
dynasm!(ctx.asm dynasm!(ctx.asm
; push Rq(reg) ; push Rq(reg)
); );
@@ -1180,7 +1424,7 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32, return_arity: u32) -> CallC
// `AbsoluteValueLocation` and `RelativeValueLocation`. // `AbsoluteValueLocation` and `RelativeValueLocation`.
let offset = let offset =
stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32; stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32;
pop_i32_into(ctx, ValueLocation::Stack(offset)); pop_into(ctx, ValueLocation::Stack(offset));
} }
} }
@@ -1188,7 +1432,7 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32, return_arity: u32) -> CallC
.iter() .iter()
.rev() .rev()
{ {
pop_i32_into(ctx, ValueLocation::Reg(*reg)); pop_into(ctx, ValueLocation::Reg(*reg));
} }
// We do this before doing `save_volatile`, since otherwise we'll trample the return value // We do this before doing `save_volatile`, since otherwise we'll trample the return value
@@ -1220,9 +1464,18 @@ fn post_call_cleanup(ctx: &mut Context, mut cleanup: CallCleanup) {
} }
} }
fn push_function_return(ctx: &mut Context, arity: u32) {
if arity == 0 {
return;
}
debug_assert_eq!(arity, 1);
ctx.block_state.regs.mark_used(RAX);
ctx.block_state.stack.push(StackValue::Temp(RAX));
}
/// Call a function with the given index /// Call a function with the given index
pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) { pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) {
assert!( debug_assert!(
return_arity == 0 || return_arity == 1, return_arity == 0 || return_arity == 1,
"We don't support multiple return yet" "We don't support multiple return yet"
); );
@@ -1235,7 +1488,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity:
); );
post_call_cleanup(ctx, cleanup); post_call_cleanup(ctx, cleanup);
push_return_value(ctx, return_arity); push_function_return(ctx, return_arity);
} }
#[must_use] #[must_use]

View File

@@ -134,8 +134,7 @@ pub fn translate(
)); ));
// TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we // TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we
// can coelesce multiple `end`s and optimise break-at-end-of-block into noop. We can't do one // can coelesce multiple `end`s and optimise break-at-end-of-block into noop.
// without the other, since the main case we want to optimise is `(block (loop (br 1)))`.
for op in operators { for op in operators {
let op = op?; let op = op?;
@@ -311,6 +310,22 @@ pub fn translate(
Operator::I32Or => i32_or(ctx), Operator::I32Or => i32_or(ctx),
Operator::I32Xor => i32_xor(ctx), Operator::I32Xor => i32_xor(ctx),
Operator::I32Mul => i32_mul(ctx), Operator::I32Mul => i32_mul(ctx),
Operator::I64Eq => i64_eq(ctx),
Operator::I64Ne => i64_neq(ctx),
Operator::I64LtS => i64_lt_s(ctx),
Operator::I64LeS => i64_le_s(ctx),
Operator::I64GtS => i64_gt_s(ctx),
Operator::I64GeS => i64_ge_s(ctx),
Operator::I64LtU => i64_lt_u(ctx),
Operator::I64LeU => i64_le_u(ctx),
Operator::I64GtU => i64_gt_u(ctx),
Operator::I64GeU => i64_ge_u(ctx),
Operator::I64Add => i64_add(ctx),
Operator::I64Sub => i64_sub(ctx),
Operator::I64And => i64_and(ctx),
Operator::I64Or => i64_or(ctx),
Operator::I64Xor => i64_xor(ctx),
Operator::I64Mul => i64_mul(ctx),
Operator::Drop => drop(ctx), Operator::Drop => drop(ctx),
Operator::SetLocal { local_index } => set_local_i32(ctx, local_index), Operator::SetLocal { local_index } => set_local_i32(ctx, local_index),
Operator::GetLocal { local_index } => get_local_i32(ctx, local_index), Operator::GetLocal { local_index } => get_local_i32(ctx, local_index),

View File

@@ -18,7 +18,10 @@ fn empty() {
let _ = translate_wat("(module (func))"); let _ = translate_wat("(module (func))");
} }
macro_rules! binop_test { mod op32 {
use super::{translate_wat, TranslatedModule};
macro_rules! binop_test {
($op:ident, $func:expr) => { ($op:ident, $func:expr) => {
quickcheck! { quickcheck! {
fn $op(a: u32, b: u32) -> bool { fn $op(a: u32, b: u32) -> bool {
@@ -36,22 +39,67 @@ macro_rules! binop_test {
} }
} }
}; };
}
binop_test!(add, u32::wrapping_add);
binop_test!(sub, u32::wrapping_sub);
binop_test!(and, std::ops::BitAnd::bitand);
binop_test!(or, std::ops::BitOr::bitor);
binop_test!(xor, std::ops::BitXor::bitxor);
binop_test!(mul, u32::wrapping_mul);
binop_test!(lt_u, |a, b| if a < b { 1 } else { 0 });
binop_test!(le_u, |a, b| if a <= b { 1 } else { 0 });
binop_test!(gt_u, |a, b| if a > b { 1 } else { 0 });
binop_test!(ge_u, |a, b| if a >= b { 1 } else { 0 });
binop_test!(lt_s, |a, b| if (a as i32) < (b as i32) { 1 } else { 0 });
binop_test!(le_s, |a, b| if (a as i32) <= (b as i32) { 1 } else { 0 });
binop_test!(gt_s, |a, b| if (a as i32) > (b as i32) { 1 } else { 0 });
binop_test!(ge_s, |a, b| if (a as i32) >= (b as i32) { 1 } else { 0 });
} }
binop_test!(add, u32::wrapping_add); mod op64 {
binop_test!(sub, u32::wrapping_sub); use super::{translate_wat, TranslatedModule};
binop_test!(and, std::ops::BitAnd::bitand);
binop_test!(or, std::ops::BitOr::bitor); macro_rules! binop_test {
binop_test!(xor, std::ops::BitXor::bitxor); ($op:ident, $func:expr) => {
binop_test!(mul, u32::wrapping_mul); binop_test!($op, $func, i64);
binop_test!(lt_u, |a, b| if a < b { 1 } else { 0 }); };
binop_test!(le_u, |a, b| if a <= b { 1 } else { 0 }); ($op:ident, $func:expr, $retty:ident) => {
binop_test!(gt_u, |a, b| if a > b { 1 } else { 0 }); quickcheck! {
binop_test!(ge_u, |a, b| if a >= b { 1 } else { 0 }); fn $op(a: u64, b: u64) -> bool {
binop_test!(lt_s, |a, b| if (a as i32) < (b as i32) { 1 } else { 0 }); static CODE: &str = concat!(
binop_test!(le_s, |a, b| if (a as i32) <= (b as i32) { 1 } else { 0 }); "(module (func (param i64) (param i64) (result ",
binop_test!(gt_s, |a, b| if (a as i32) > (b as i32) { 1 } else { 0 }); stringify!($retty),
binop_test!(ge_s, |a, b| if (a as i32) >= (b as i32) { 1 } else { 0 }); ") (i64.",
stringify!($op),
" (get_local 0) (get_local 1))))"
);
lazy_static! {
static ref TRANSLATED: TranslatedModule = translate_wat(CODE);
}
unsafe { TRANSLATED.execute_func::<(u64, u64), u64>(0, (a, b)) == $func(a, b) }
}
}
};
}
binop_test!(add, u64::wrapping_add);
binop_test!(sub, u64::wrapping_sub);
binop_test!(and, std::ops::BitAnd::bitand);
binop_test!(or, std::ops::BitOr::bitor);
binop_test!(xor, std::ops::BitXor::bitxor);
binop_test!(mul, u64::wrapping_mul);
binop_test!(lt_u, |a, b| if a < b { 1 } else { 0 }, i32);
binop_test!(le_u, |a, b| if a <= b { 1 } else { 0 }, i32);
binop_test!(gt_u, |a, b| if a > b { 1 } else { 0 }, i32);
binop_test!(ge_u, |a, b| if a >= b { 1 } else { 0 }, i32);
binop_test!(lt_s, |a, b| if (a as i64) < (b as i64) { 1 } else { 0 }, i32);
binop_test!(le_s, |a, b| if (a as i64) <= (b as i64) { 1 } else { 0 }, i32);
binop_test!(gt_s, |a, b| if (a as i64) > (b as i64) { 1 } else { 0 }, i32);
binop_test!(ge_s, |a, b| if (a as i64) >= (b as i64) { 1 } else { 0 }, i32);
}
quickcheck! { quickcheck! {
fn relop_eq(a: u32, b: u32) -> bool{ fn relop_eq(a: u32, b: u32) -> bool{
@@ -571,16 +619,29 @@ fn fib() {
} }
#[bench] #[bench]
fn bench_compile(b: &mut test::Bencher) { fn bench_fibonacci_compile(b: &mut test::Bencher) {
let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); let wasm = wabt::wat2wasm(FIBONACCI).unwrap();
b.iter(|| test::black_box(translate(&wasm).unwrap())); b.iter(|| test::black_box(translate(&wasm).unwrap()));
} }
#[bench] #[bench]
fn bench_run(b: &mut test::Bencher) { fn bench_fibonacci_run(b: &mut test::Bencher) {
let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); let wasm = wabt::wat2wasm(FIBONACCI).unwrap();
let module = translate(&wasm).unwrap(); let module = translate(&wasm).unwrap();
b.iter(|| unsafe { module.execute_func::<_, u32>(0, (20,)) }); b.iter(|| unsafe { module.execute_func::<_, u32>(0, (20,)) });
} }
#[bench]
fn bench_fibonacci_baseline(b: &mut test::Bencher) {
fn fib(n: i32) -> i32 {
if n == 0 || n == 1 {
1
} else {
fib(n - 1) + fib(n - 2)
}
}
b.iter(|| test::black_box(fib(test::black_box(20))));
}