Improve bitselect codegen with knowledge of operand origin (#1783)

* Encode vselect using BLEND instructions on x86

* Legalize vselect to bitselect

* Optimize bitselect to vselect for some operands

* Add run tests for bitselect-vselect optimization

* Address review feedback
This commit is contained in:
teapotd
2020-05-30 04:53:11 +02:00
committed by GitHub
parent 16afca4451
commit e430984ac4
11 changed files with 341 additions and 1 deletions

View File

@@ -378,6 +378,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let vconst = insts.by_name("vconst");
let vall_true = insts.by_name("vall_true");
let vany_true = insts.by_name("vany_true");
let vselect = insts.by_name("vselect");
let x86_packss = x86_instructions.by_name("x86_packss");
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
@@ -589,6 +590,17 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
);
}
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
// This works, because each lane of boolean vector is filled with zeroes or ones.
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vselect = vselect.bind(vector(ty, sse_vector_size));
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
narrow.legalize(
def!(d = vselect(c, x, y)),
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
);
}
// SIMD vany_true
let ne = Literal::enumerator_for(&imm.intcc, "ne");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {