Avoid extra register movement when lowering an x86 insertlane to a float vector

This commit is contained in:
Andrew Brown
2019-08-23 11:38:29 -07:00
parent 3dfc68afb1
commit 295b2ef614
11 changed files with 334 additions and 55 deletions

View File

@@ -950,3 +950,65 @@ fn convert_extractlane(
}
}
}
/// Because floats exist in XMM registers, we can keep them there when executing a CLIF
/// insertlane instruction
fn convert_insertlane(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::InsertLane {
opcode: ir::Opcode::Insertlane,
args: [vector, replacement],
lane,
} = pos.func.dfg[inst]
{
let value_type = pos.func.dfg.value_type(vector);
if value_type.lane_type().is_float() {
// Floats are already in XMM registers and can stay there.
match value_type {
F32X4 => {
assert!(lane > 0 && lane <= 3);
let immediate = 0b00_00_00_00 | lane << 4;
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
// shifted into bits 5:6).
pos.func
.dfg
.replace(inst)
.x86_insertps(vector, immediate, replacement)
}
F64X2 => {
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
if lane == 0 {
// Move the lowest quadword in replacement to vector without changing
// the upper bits.
pos.func
.dfg
.replace(inst)
.x86_movsd(vector, replacement_as_vector)
} else {
assert_eq!(lane, 1);
// Move the low 64 bits of replacement vector to the high 64 bits of the
// vector.
pos.func
.dfg
.replace(inst)
.x86_movlhps(vector, replacement_as_vector)
}
}
_ => unreachable!(),
};
} else {
// For non-floats, lower with the usual PINSR* instruction.
pos.func
.dfg
.replace(inst)
.x86_pinsr(vector, lane, replacement);
}
}
}