These instructions have fast, inline JIT paths for the common cases, and only
call out to host VM functions for the slow paths. This required some changes to
`cranelift-wasm`'s `FuncEnvironment`: instead of taking a `FuncCursor` to insert
an instruction sequence within the current basic block,
`FuncEnvironment::translate_table_{get,set}` now take a `&mut FunctionBuilder`
so that they can create whole new basic blocks. This is necessary for
implementing GC read/write barriers that involve branching (e.g. checking for
null, or whether a store buffer is at capacity).
Furthermore, it required that the `load`, `load_complex`, and `store`
instructions handle loading and storing through an `r{32,64}` rather than just
`i{32,64}` addresses. This involved making `r{32,64}` types acceptable
instantiations of the `iAddr` type variable, plus a few new instruction
encodings.
Part of #929
428 lines
14 KiB
Rust
428 lines
14 KiB
Rust
//! A post-legalization rewriting pass.
|
|
|
|
#![allow(non_snake_case)]
|
|
|
|
use crate::cursor::{Cursor, EncCursor};
|
|
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
|
|
use crate::ir::dfg::ValueDef;
|
|
use crate::ir::immediates::{Imm64, Offset32};
|
|
use crate::ir::instructions::{Opcode, ValueList};
|
|
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
|
|
use crate::isa::TargetIsa;
|
|
use crate::timing;
|
|
|
|
/// Information collected about a compare+branch sequence.
|
|
struct CmpBrInfo {
|
|
/// The branch instruction.
|
|
br_inst: Inst,
|
|
/// The icmp, icmp_imm, or fcmp instruction.
|
|
cmp_inst: Inst,
|
|
/// The destination of the branch.
|
|
destination: Block,
|
|
/// The arguments of the branch.
|
|
args: ValueList,
|
|
/// The first argument to the comparison. The second is in the `kind` field.
|
|
cmp_arg: Value,
|
|
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
|
|
/// before the branch.
|
|
invert_branch_cond: bool,
|
|
/// The kind of comparison, and the second argument.
|
|
kind: CmpBrKind,
|
|
}
|
|
|
|
enum CmpBrKind {
|
|
Icmp { cond: IntCC, arg: Value },
|
|
IcmpImm { cond: IntCC, imm: Imm64 },
|
|
Fcmp { cond: FloatCC, arg: Value },
|
|
}
|
|
|
|
/// Optimize comparisons to use flags values, to avoid materializing conditions
|
|
/// in integer registers.
|
|
///
|
|
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
|
|
/// sequences.
|
|
fn optimize_cpu_flags(
|
|
pos: &mut EncCursor,
|
|
inst: Inst,
|
|
last_flags_clobber: Option<Inst>,
|
|
isa: &dyn TargetIsa,
|
|
) {
|
|
// Look for compare and branch patterns.
|
|
// This code could be considerably simplified with non-lexical lifetimes.
|
|
let info = match pos.func.dfg[inst] {
|
|
InstructionData::Branch {
|
|
opcode,
|
|
destination,
|
|
ref args,
|
|
} => {
|
|
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
|
|
let invert_branch_cond = match opcode {
|
|
Opcode::Brz => true,
|
|
Opcode::Brnz => false,
|
|
_ => panic!(),
|
|
};
|
|
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
|
|
match pos.func.dfg[cond_inst] {
|
|
InstructionData::IntCompare {
|
|
cond,
|
|
args: cmp_args,
|
|
..
|
|
} => CmpBrInfo {
|
|
br_inst: inst,
|
|
cmp_inst: cond_inst,
|
|
destination,
|
|
args: args.clone(),
|
|
cmp_arg: cmp_args[0],
|
|
invert_branch_cond,
|
|
kind: CmpBrKind::Icmp {
|
|
cond,
|
|
arg: cmp_args[1],
|
|
},
|
|
},
|
|
InstructionData::IntCompareImm {
|
|
cond,
|
|
arg: cmp_arg,
|
|
imm: cmp_imm,
|
|
..
|
|
} => CmpBrInfo {
|
|
br_inst: inst,
|
|
cmp_inst: cond_inst,
|
|
destination,
|
|
args: args.clone(),
|
|
cmp_arg,
|
|
invert_branch_cond,
|
|
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
|
|
},
|
|
InstructionData::FloatCompare {
|
|
cond,
|
|
args: cmp_args,
|
|
..
|
|
} => CmpBrInfo {
|
|
br_inst: inst,
|
|
cmp_inst: cond_inst,
|
|
destination,
|
|
args: args.clone(),
|
|
cmp_arg: cmp_args[0],
|
|
invert_branch_cond,
|
|
kind: CmpBrKind::Fcmp {
|
|
cond,
|
|
arg: cmp_args[1],
|
|
},
|
|
},
|
|
_ => return,
|
|
}
|
|
} else {
|
|
return;
|
|
}
|
|
}
|
|
// TODO: trapif, trueif, selectif, and their ff counterparts.
|
|
_ => return,
|
|
};
|
|
|
|
// If any instructions clobber the flags between the comparison and the branch,
|
|
// don't optimize them.
|
|
if last_flags_clobber != Some(info.cmp_inst) {
|
|
return;
|
|
}
|
|
|
|
// We found a compare+branch pattern. Transform it to use flags.
|
|
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
|
|
pos.goto_inst(info.cmp_inst);
|
|
pos.use_srcloc(info.cmp_inst);
|
|
match info.kind {
|
|
CmpBrKind::Icmp { mut cond, arg } => {
|
|
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
|
|
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
|
if info.invert_branch_cond {
|
|
cond = cond.inverse();
|
|
}
|
|
pos.func
|
|
.dfg
|
|
.replace(info.br_inst)
|
|
.brif(cond, flags, info.destination, &args);
|
|
}
|
|
CmpBrKind::IcmpImm { mut cond, imm } => {
|
|
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
|
|
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
|
if info.invert_branch_cond {
|
|
cond = cond.inverse();
|
|
}
|
|
pos.func
|
|
.dfg
|
|
.replace(info.br_inst)
|
|
.brif(cond, flags, info.destination, &args);
|
|
}
|
|
CmpBrKind::Fcmp { mut cond, arg } => {
|
|
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
|
|
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
|
|
if info.invert_branch_cond {
|
|
cond = cond.inverse();
|
|
}
|
|
pos.func
|
|
.dfg
|
|
.replace(info.br_inst)
|
|
.brff(cond, flags, info.destination, &args);
|
|
}
|
|
}
|
|
let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
|
|
debug_assert!(ok);
|
|
let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
|
|
debug_assert!(ok);
|
|
}
|
|
|
|
struct MemOpInfo {
|
|
opcode: Opcode,
|
|
itype: Type,
|
|
arg: Value,
|
|
st_arg: Option<Value>,
|
|
flags: MemFlags,
|
|
offset: Offset32,
|
|
}
|
|
|
|
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
|
|
// Look for simple loads and stores we can optimize.
|
|
let info = match pos.func.dfg[inst] {
|
|
InstructionData::Load {
|
|
opcode,
|
|
arg,
|
|
flags,
|
|
offset,
|
|
} => MemOpInfo {
|
|
opcode,
|
|
itype: pos.func.dfg.ctrl_typevar(inst),
|
|
arg,
|
|
st_arg: None,
|
|
flags,
|
|
offset,
|
|
},
|
|
InstructionData::Store {
|
|
opcode,
|
|
args,
|
|
flags,
|
|
offset,
|
|
} => MemOpInfo {
|
|
opcode,
|
|
itype: pos.func.dfg.ctrl_typevar(inst),
|
|
arg: args[1],
|
|
st_arg: Some(args[0]),
|
|
flags,
|
|
offset,
|
|
},
|
|
_ => return,
|
|
};
|
|
|
|
// Examine the instruction that defines the address operand.
|
|
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
|
|
match pos.func.dfg[result_inst] {
|
|
InstructionData::Binary {
|
|
opcode: Opcode::Iadd,
|
|
args,
|
|
} => match info.opcode {
|
|
// Operand is an iadd. Fold it into a memory address with a complex address mode.
|
|
Opcode::Load => {
|
|
pos.func.dfg.replace(inst).load_complex(
|
|
info.itype,
|
|
info.flags,
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Uload8 => {
|
|
pos.func.dfg.replace(inst).uload8_complex(
|
|
info.itype,
|
|
info.flags,
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Sload8 => {
|
|
pos.func.dfg.replace(inst).sload8_complex(
|
|
info.itype,
|
|
info.flags,
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Uload16 => {
|
|
pos.func.dfg.replace(inst).uload16_complex(
|
|
info.itype,
|
|
info.flags,
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Sload16 => {
|
|
pos.func.dfg.replace(inst).sload16_complex(
|
|
info.itype,
|
|
info.flags,
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Uload32 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.uload32_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Sload32 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.sload32_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Uload8x8 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.uload8x8_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Sload8x8 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.sload8x8_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Uload16x4 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.uload16x4_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Sload16x4 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.sload16x4_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Uload32x2 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.uload32x2_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Sload32x2 => {
|
|
pos.func
|
|
.dfg
|
|
.replace(inst)
|
|
.sload32x2_complex(info.flags, &args, info.offset);
|
|
}
|
|
Opcode::Store => {
|
|
pos.func.dfg.replace(inst).store_complex(
|
|
info.flags,
|
|
info.st_arg.unwrap(),
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Istore8 => {
|
|
pos.func.dfg.replace(inst).istore8_complex(
|
|
info.flags,
|
|
info.st_arg.unwrap(),
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Istore16 => {
|
|
pos.func.dfg.replace(inst).istore16_complex(
|
|
info.flags,
|
|
info.st_arg.unwrap(),
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
Opcode::Istore32 => {
|
|
pos.func.dfg.replace(inst).istore32_complex(
|
|
info.flags,
|
|
info.st_arg.unwrap(),
|
|
&args,
|
|
info.offset,
|
|
);
|
|
}
|
|
_ => panic!("Unsupported load or store opcode"),
|
|
},
|
|
InstructionData::BinaryImm64 {
|
|
opcode: Opcode::IaddImm,
|
|
arg,
|
|
imm,
|
|
} => match pos.func.dfg[inst] {
|
|
// Operand is an iadd_imm. Fold the immediate into the offset if possible.
|
|
InstructionData::Load {
|
|
arg: ref mut load_arg,
|
|
ref mut offset,
|
|
..
|
|
} => {
|
|
if let Some(imm) = offset.try_add_i64(imm.into()) {
|
|
*load_arg = arg;
|
|
*offset = imm;
|
|
} else {
|
|
// Overflow.
|
|
return;
|
|
}
|
|
}
|
|
InstructionData::Store {
|
|
args: ref mut store_args,
|
|
ref mut offset,
|
|
..
|
|
} => {
|
|
if let Some(imm) = offset.try_add_i64(imm.into()) {
|
|
store_args[1] = arg;
|
|
*offset = imm;
|
|
} else {
|
|
// Overflow.
|
|
return;
|
|
}
|
|
}
|
|
_ => panic!(),
|
|
},
|
|
_ => {
|
|
// Address value is defined by some other kind of instruction.
|
|
return;
|
|
}
|
|
}
|
|
} else {
|
|
// Address value is not the result of an instruction.
|
|
return;
|
|
}
|
|
|
|
let ok = pos.func.update_encoding(inst, isa).is_ok();
|
|
debug_assert!(
|
|
ok,
|
|
"failed to update encoding for `{}`",
|
|
pos.func.dfg.display_inst(inst, isa)
|
|
);
|
|
}
|
|
|
|
//----------------------------------------------------------------------
|
|
//
|
|
// The main post-opt pass.
|
|
|
|
pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
|
|
let _tt = timing::postopt();
|
|
let mut pos = EncCursor::new(func, isa);
|
|
let is_mach_backend = isa.get_mach_backend().is_some();
|
|
while let Some(_block) = pos.next_block() {
|
|
let mut last_flags_clobber = None;
|
|
while let Some(inst) = pos.next_inst() {
|
|
if !is_mach_backend && isa.uses_cpu_flags() {
|
|
// Optimize instructions to make use of flags.
|
|
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
|
|
|
|
// Track the most recent seen instruction that clobbers the flags.
|
|
if let Some(constraints) = isa
|
|
.encoding_info()
|
|
.operand_constraints(pos.func.encodings[inst])
|
|
{
|
|
if constraints.clobbers_flags {
|
|
last_flags_clobber = Some(inst)
|
|
}
|
|
}
|
|
}
|
|
|
|
if isa.uses_complex_addresses() {
|
|
optimize_complex_addresses(&mut pos, inst, isa);
|
|
}
|
|
}
|
|
}
|
|
}
|