Improve bitselect codegen with knowledge of operand origin (#1783)
* Encode vselect using BLEND instructions on x86 * Legalize vselect to bitselect * Optimize bitselect to vselect for some operands * Add run tests for bitselect-vselect optimization * Address review feedback
This commit is contained in:
@@ -656,7 +656,7 @@ mod simplify {
|
||||
dfg::ValueDef,
|
||||
immediates,
|
||||
instructions::{Opcode, ValueList},
|
||||
types::{I16, I32, I8},
|
||||
types::{B8, I16, I32, I8},
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
@@ -935,6 +935,69 @@ mod simplify {
|
||||
}
|
||||
}
|
||||
|
||||
InstructionData::Ternary {
|
||||
opcode: Opcode::Bitselect,
|
||||
args,
|
||||
} => {
|
||||
let old_cond_type = pos.func.dfg.value_type(args[0]);
|
||||
if !old_cond_type.is_vector() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Replace bitselect with vselect if each lane of controlling mask is either
|
||||
// all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
|
||||
// while vselect can be encoded using single BLEND instruction.
|
||||
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
|
||||
InstructionData::Unary {
|
||||
opcode: Opcode::RawBitcast,
|
||||
arg,
|
||||
} => {
|
||||
// If controlling mask is raw-bitcasted boolean vector then
|
||||
// we know each lane is either all zeroes or ones,
|
||||
// so we can use vselect instruction instead.
|
||||
let arg_type = pos.func.dfg.value_type(arg);
|
||||
if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
|
||||
return;
|
||||
}
|
||||
(arg, arg_type)
|
||||
}
|
||||
InstructionData::UnaryConst {
|
||||
opcode: Opcode::Vconst,
|
||||
constant_handle,
|
||||
} => {
|
||||
// If each byte of controlling mask is 0x00 or 0xFF then
|
||||
// we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
|
||||
// Bitselect operates at bit level, so the lane types don't matter.
|
||||
let const_data = pos.func.dfg.constants.get(constant_handle);
|
||||
if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
|
||||
return;
|
||||
}
|
||||
let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
|
||||
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
|
||||
let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
|
||||
let old_arg_type = pos.func.dfg.value_type(args[1]);
|
||||
|
||||
if arg_type != old_arg_type {
|
||||
// Operands types must match, we need to add bitcasts.
|
||||
let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
|
||||
let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
|
||||
let ret = pos.ins().vselect(cond_val, arg1, arg2);
|
||||
pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
|
||||
} else {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.vselect(cond_val, args[1], args[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user