Avoid extra register movement when lowering an x86 insertlane to a float vector
This commit is contained in:
@@ -950,3 +950,65 @@ fn convert_extractlane(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Because floats exist in XMM registers, we can keep them there when executing a CLIF
|
||||
/// insertlane instruction
|
||||
fn convert_insertlane(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
if let ir::InstructionData::InsertLane {
|
||||
opcode: ir::Opcode::Insertlane,
|
||||
args: [vector, replacement],
|
||||
lane,
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
let value_type = pos.func.dfg.value_type(vector);
|
||||
if value_type.lane_type().is_float() {
|
||||
// Floats are already in XMM registers and can stay there.
|
||||
match value_type {
|
||||
F32X4 => {
|
||||
assert!(lane > 0 && lane <= 3);
|
||||
let immediate = 0b00_00_00_00 | lane << 4;
|
||||
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
|
||||
// shifted into bits 5:6).
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_insertps(vector, immediate, replacement)
|
||||
}
|
||||
F64X2 => {
|
||||
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
|
||||
if lane == 0 {
|
||||
// Move the lowest quadword in replacement to vector without changing
|
||||
// the upper bits.
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_movsd(vector, replacement_as_vector)
|
||||
} else {
|
||||
assert_eq!(lane, 1);
|
||||
// Move the low 64 bits of replacement vector to the high 64 bits of the
|
||||
// vector.
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_movlhps(vector, replacement_as_vector)
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
} else {
|
||||
// For non-floats, lower with the usual PINSR* instruction.
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.x86_pinsr(vector, lane, replacement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user