Files
wasmtime/cranelift/codegen/src/postopt.rs
Nick Fitzgerald 8c5f59c0cf wasmtime: Implement table.get and table.set
These instructions have fast, inline JIT paths for the common cases, and only
call out to host VM functions for the slow paths. This required some changes to
`cranelift-wasm`'s `FuncEnvironment`: instead of taking a `FuncCursor` to insert
an instruction sequence within the current basic block,
`FuncEnvironment::translate_table_{get,set}` now take a `&mut FunctionBuilder`
so that they can create whole new basic blocks. This is necessary for
implementing GC read/write barriers that involve branching (e.g. checking for
null, or whether a store buffer is at capacity).

Furthermore, it required that the `load`, `load_complex`, and `store`
instructions handle loading and storing through an `r{32,64}` rather than just
`i{32,64}` addresses. This involved making `r{32,64}` types acceptable
instantiations of the `iAddr` type variable, plus a few new instruction
encodings.

Part of #929
2020-06-30 12:00:57 -07:00

428 lines
14 KiB
Rust

//! A post-legalization rewriting pass.
#![allow(non_snake_case)]
use crate::cursor::{Cursor, EncCursor};
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
use crate::ir::dfg::ValueDef;
use crate::ir::immediates::{Imm64, Offset32};
use crate::ir::instructions::{Opcode, ValueList};
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
use crate::isa::TargetIsa;
use crate::timing;
/// Information collected about a compare+branch sequence.
struct CmpBrInfo {
/// The branch instruction.
br_inst: Inst,
/// The icmp, icmp_imm, or fcmp instruction.
cmp_inst: Inst,
/// The destination of the branch.
destination: Block,
/// The arguments of the branch.
args: ValueList,
/// The first argument to the comparison. The second is in the `kind` field.
cmp_arg: Value,
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
/// before the branch.
invert_branch_cond: bool,
/// The kind of comparison, and the second argument.
kind: CmpBrKind,
}
enum CmpBrKind {
Icmp { cond: IntCC, arg: Value },
IcmpImm { cond: IntCC, imm: Imm64 },
Fcmp { cond: FloatCC, arg: Value },
}
/// Optimize comparisons to use flags values, to avoid materializing conditions
/// in integer registers.
///
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
/// sequences.
fn optimize_cpu_flags(
pos: &mut EncCursor,
inst: Inst,
last_flags_clobber: Option<Inst>,
isa: &dyn TargetIsa,
) {
// Look for compare and branch patterns.
// This code could be considerably simplified with non-lexical lifetimes.
let info = match pos.func.dfg[inst] {
InstructionData::Branch {
opcode,
destination,
ref args,
} => {
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
let invert_branch_cond = match opcode {
Opcode::Brz => true,
Opcode::Brnz => false,
_ => panic!(),
};
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
match pos.func.dfg[cond_inst] {
InstructionData::IntCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Icmp {
cond,
arg: cmp_args[1],
},
},
InstructionData::IntCompareImm {
cond,
arg: cmp_arg,
imm: cmp_imm,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg,
invert_branch_cond,
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
},
InstructionData::FloatCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Fcmp {
cond,
arg: cmp_args[1],
},
},
_ => return,
}
} else {
return;
}
}
// TODO: trapif, trueif, selectif, and their ff counterparts.
_ => return,
};
// If any instructions clobber the flags between the comparison and the branch,
// don't optimize them.
if last_flags_clobber != Some(info.cmp_inst) {
return;
}
// We found a compare+branch pattern. Transform it to use flags.
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
pos.goto_inst(info.cmp_inst);
pos.use_srcloc(info.cmp_inst);
match info.kind {
CmpBrKind::Icmp { mut cond, arg } => {
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::IcmpImm { mut cond, imm } => {
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::Fcmp { mut cond, arg } => {
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brff(cond, flags, info.destination, &args);
}
}
let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
debug_assert!(ok);
let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
debug_assert!(ok);
}
struct MemOpInfo {
opcode: Opcode,
itype: Type,
arg: Value,
st_arg: Option<Value>,
flags: MemFlags,
offset: Offset32,
}
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
// Look for simple loads and stores we can optimize.
let info = match pos.func.dfg[inst] {
InstructionData::Load {
opcode,
arg,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg,
st_arg: None,
flags,
offset,
},
InstructionData::Store {
opcode,
args,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: args[1],
st_arg: Some(args[0]),
flags,
offset,
},
_ => return,
};
// Examine the instruction that defines the address operand.
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
match pos.func.dfg[result_inst] {
InstructionData::Binary {
opcode: Opcode::Iadd,
args,
} => match info.opcode {
// Operand is an iadd. Fold it into a memory address with a complex address mode.
Opcode::Load => {
pos.func.dfg.replace(inst).load_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload8 => {
pos.func.dfg.replace(inst).uload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload8 => {
pos.func.dfg.replace(inst).sload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload16 => {
pos.func.dfg.replace(inst).uload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload16 => {
pos.func.dfg.replace(inst).sload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload32 => {
pos.func
.dfg
.replace(inst)
.uload32_complex(info.flags, &args, info.offset);
}
Opcode::Sload32 => {
pos.func
.dfg
.replace(inst)
.sload32_complex(info.flags, &args, info.offset);
}
Opcode::Uload8x8 => {
pos.func
.dfg
.replace(inst)
.uload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Sload8x8 => {
pos.func
.dfg
.replace(inst)
.sload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Uload16x4 => {
pos.func
.dfg
.replace(inst)
.uload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Sload16x4 => {
pos.func
.dfg
.replace(inst)
.sload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Uload32x2 => {
pos.func
.dfg
.replace(inst)
.uload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Sload32x2 => {
pos.func
.dfg
.replace(inst)
.sload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Store => {
pos.func.dfg.replace(inst).store_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore8 => {
pos.func.dfg.replace(inst).istore8_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore16 => {
pos.func.dfg.replace(inst).istore16_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore32 => {
pos.func.dfg.replace(inst).istore32_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
_ => panic!("Unsupported load or store opcode"),
},
InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg,
imm,
} => match pos.func.dfg[inst] {
// Operand is an iadd_imm. Fold the immediate into the offset if possible.
InstructionData::Load {
arg: ref mut load_arg,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
*load_arg = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
InstructionData::Store {
args: ref mut store_args,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
store_args[1] = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
_ => panic!(),
},
_ => {
// Address value is defined by some other kind of instruction.
return;
}
}
} else {
// Address value is not the result of an instruction.
return;
}
let ok = pos.func.update_encoding(inst, isa).is_ok();
debug_assert!(
ok,
"failed to update encoding for `{}`",
pos.func.dfg.display_inst(inst, isa)
);
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
let _tt = timing::postopt();
let mut pos = EncCursor::new(func, isa);
let is_mach_backend = isa.get_mach_backend().is_some();
while let Some(_block) = pos.next_block() {
let mut last_flags_clobber = None;
while let Some(inst) = pos.next_inst() {
if !is_mach_backend && isa.uses_cpu_flags() {
// Optimize instructions to make use of flags.
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
// Track the most recent seen instruction that clobbers the flags.
if let Some(constraints) = isa
.encoding_info()
.operand_constraints(pos.func.encodings[inst])
{
if constraints.clobbers_flags {
last_flags_clobber = Some(inst)
}
}
}
if isa.uses_complex_addresses() {
optimize_complex_addresses(&mut pos, inst, isa);
}
}
}
}