Merge pull request #2958 from abrown/fix-avx512-flags
x64: fix AVX512 flag checks
This commit is contained in:
@@ -137,7 +137,7 @@ pub(crate) fn emit(
|
|||||||
// Certain instructions may be present in more than one ISA feature set; we must at least match
|
// Certain instructions may be present in more than one ISA feature set; we must at least match
|
||||||
// one of them in the target CPU.
|
// one of them in the target CPU.
|
||||||
let isa_requirements = inst.available_in_any_isa();
|
let isa_requirements = inst.available_in_any_isa();
|
||||||
if !isa_requirements.is_empty() && !isa_requirements.iter().any(matches_isa_flags) {
|
if !isa_requirements.is_empty() && !isa_requirements.iter().all(matches_isa_flags) {
|
||||||
panic!(
|
panic!(
|
||||||
"Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}",
|
"Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}",
|
||||||
inst, isa_requirements
|
inst, isa_requirements
|
||||||
|
|||||||
@@ -4324,10 +4324,11 @@ fn test_x64_emit() {
|
|||||||
let mut isa_flag_builder = x64::settings::builder();
|
let mut isa_flag_builder = x64::settings::builder();
|
||||||
isa_flag_builder.enable("has_ssse3").unwrap();
|
isa_flag_builder.enable("has_ssse3").unwrap();
|
||||||
isa_flag_builder.enable("has_sse41").unwrap();
|
isa_flag_builder.enable("has_sse41").unwrap();
|
||||||
isa_flag_builder.enable("has_avx512f").unwrap();
|
isa_flag_builder.enable("has_avx512bitalg").unwrap();
|
||||||
isa_flag_builder.enable("has_avx512dq").unwrap();
|
isa_flag_builder.enable("has_avx512dq").unwrap();
|
||||||
isa_flag_builder.enable("has_avx512vl").unwrap();
|
isa_flag_builder.enable("has_avx512f").unwrap();
|
||||||
isa_flag_builder.enable("has_avx512vbmi").unwrap();
|
isa_flag_builder.enable("has_avx512vbmi").unwrap();
|
||||||
|
isa_flag_builder.enable("has_avx512vl").unwrap();
|
||||||
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
|
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
|
||||||
|
|
||||||
let rru = regs::create_reg_universe_systemv(&flags);
|
let rru = regs::create_reg_universe_systemv(&flags);
|
||||||
|
|||||||
@@ -1668,8 +1668,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
|
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512dq_simd() {
|
||||||
// With the right AVX512 features (VL, DQ) this operation
|
// With the right AVX512 features (VL + DQ) this operation
|
||||||
// can lower to a single operation.
|
// can lower to a single operation.
|
||||||
ctx.emit(Inst::xmm_rm_r_evex(
|
ctx.emit(Inst::xmm_rm_r_evex(
|
||||||
Avx512Opcode::Vpmullq,
|
Avx512Opcode::Vpmullq,
|
||||||
@@ -1905,7 +1905,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if ty == types::I64X2 {
|
if ty == types::I64X2 {
|
||||||
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
|
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
|
||||||
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
|
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
|
||||||
} else {
|
} else {
|
||||||
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
|
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
|
||||||
@@ -2426,7 +2426,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
));
|
));
|
||||||
} else if dst_ty == types::I64X2 && op == Opcode::Sshr {
|
} else if dst_ty == types::I64X2 && op == Opcode::Sshr {
|
||||||
// The `sshr.i8x16` CLIF instruction has no single x86 instruction in the older feature sets; newer ones
|
// The `sshr.i8x16` CLIF instruction has no single x86 instruction in the older feature sets; newer ones
|
||||||
// like AVX512VL and AVX512F include VPSRAQ, a 128-bit instruction that would fit here, but this backend
|
// like AVX512VL + AVX512F include VPSRAQ, a 128-bit instruction that would fit here, but this backend
|
||||||
// does not currently have support for EVEX encodings (TODO when EVEX support is available, add an
|
// does not currently have support for EVEX encodings (TODO when EVEX support is available, add an
|
||||||
// alternate lowering here). To remedy this, we extract each 64-bit lane to a GPR, shift each using a
|
// alternate lowering here). To remedy this, we extract each 64-bit lane to a GPR, shift each using a
|
||||||
// scalar instruction, and insert the shifted values back in the `dst` XMM register.
|
// scalar instruction, and insert the shifted values back in the `dst` XMM register.
|
||||||
@@ -3084,8 +3084,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let src = put_input_in_reg(ctx, inputs[0]);
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
if isa_flags.use_avx512vl_simd() || isa_flags.use_avx512bitalg_simd() {
|
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512bitalg_simd() {
|
||||||
// When either AVX512VL or AVX512BITALG are available,
|
// When AVX512VL and AVX512BITALG are available,
|
||||||
// `popcnt.i8x16` can be lowered to a single instruction.
|
// `popcnt.i8x16` can be lowered to a single instruction.
|
||||||
assert_eq!(ty, types::I8X16);
|
assert_eq!(ty, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_unary_rm_r_evex(
|
ctx.emit(Inst::xmm_unary_rm_r_evex(
|
||||||
@@ -4163,8 +4163,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let src = put_input_in_reg(ctx, inputs[0]);
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|
||||||
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
|
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
|
||||||
// When either AVX512VL or AVX512F are available,
|
// When AVX512VL and AVX512F are available,
|
||||||
// `fcvt_from_uint` can be lowered to a single instruction.
|
// `fcvt_from_uint` can be lowered to a single instruction.
|
||||||
ctx.emit(Inst::xmm_unary_rm_r_evex(
|
ctx.emit(Inst::xmm_unary_rm_r_evex(
|
||||||
Avx512Opcode::Vcvtudq2ps,
|
Avx512Opcode::Vcvtudq2ps,
|
||||||
|
|||||||
Reference in New Issue
Block a user