Cranelift AArch64: Implement scalar FmaxPseudo and FminPseudo
Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -2926,42 +2926,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::FminPseudo | Opcode::FmaxPseudo => {
|
Opcode::FminPseudo | Opcode::FmaxPseudo => {
|
||||||
let ty = ctx.input_ty(insn, 0);
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
if ty == F32X4 || ty == F64X2 {
|
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let (ra, rb) = if op == Opcode::FminPseudo {
|
||||||
|
(rm, rn)
|
||||||
|
} else {
|
||||||
|
(rn, rm)
|
||||||
|
};
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let lane_type = ty.lane_type();
|
||||||
|
|
||||||
|
debug_assert!(lane_type == F32 || lane_type == F64);
|
||||||
|
|
||||||
|
if ty.is_vector() {
|
||||||
|
let size = VectorSize::from_ty(ty);
|
||||||
|
|
||||||
// pmin(a,b) => bitsel(b, a, cmpgt(a, b))
|
// pmin(a,b) => bitsel(b, a, cmpgt(a, b))
|
||||||
// pmax(a,b) => bitsel(b, a, cmpgt(b, a))
|
// pmax(a,b) => bitsel(b, a, cmpgt(b, a))
|
||||||
let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
// Since we're going to write the output register `rd` anyway, we might as well
|
||||||
let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
// first use it to hold the comparison result. This has the slightly unusual
|
||||||
let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
// Since we're going to write the output register `r_dst` anyway, we might as
|
|
||||||
// well first use it to hold the comparison result. This has the slightly unusual
|
|
||||||
// effect that we modify the output register in the first instruction (`fcmgt`)
|
// effect that we modify the output register in the first instruction (`fcmgt`)
|
||||||
// but read both the inputs again in the second instruction (`bsl`), which means
|
// but read both the inputs again in the second instruction (`bsl`), which means
|
||||||
// that the output register can't be either of the input registers. Regalloc
|
// that the output register can't be either of the input registers. Regalloc
|
||||||
// should handle this correctly, nevertheless.
|
// should handle this correctly, nevertheless.
|
||||||
ctx.emit(Inst::VecRRR {
|
ctx.emit(Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Fcmgt,
|
alu_op: VecALUOp::Fcmgt,
|
||||||
rd: r_dst,
|
rd,
|
||||||
rn: if op == Opcode::FminPseudo { r_a } else { r_b },
|
rn: ra,
|
||||||
rm: if op == Opcode::FminPseudo { r_b } else { r_a },
|
rm: rb,
|
||||||
size: if ty == F32X4 {
|
size,
|
||||||
VectorSize::Size32x4
|
|
||||||
} else {
|
|
||||||
VectorSize::Size64x2
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
ctx.emit(Inst::VecRRR {
|
ctx.emit(Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Bsl,
|
alu_op: VecALUOp::Bsl,
|
||||||
rd: r_dst,
|
rd,
|
||||||
rn: r_b,
|
rn,
|
||||||
rm: r_a,
|
rm,
|
||||||
size: VectorSize::Size8x16,
|
size,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
return Err(CodegenError::Unsupported(format!(
|
if lane_type == F32 {
|
||||||
"{}: Unsupported type: {:?}",
|
ctx.emit(Inst::FpuCmp32 { rn: ra, rm: rb });
|
||||||
op, ty
|
ctx.emit(Inst::FpuCSel32 {
|
||||||
)));
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
cond: Cond::Gt,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::FpuCmp64 { rn: ra, rm: rb });
|
||||||
|
ctx.emit(Inst::FpuCSel64 {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
cond: Cond::Gt,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
test run
|
test run
|
||||||
; target s390x TODO: Not yet implemented on s390x
|
; target s390x TODO: Not yet implemented on s390x
|
||||||
; target aarch64 TODO: Not yet implemented on aarch64
|
target aarch64
|
||||||
set enable_simd
|
set enable_simd
|
||||||
target x86_64 machinst skylake
|
target x86_64 machinst skylake
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user