diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f3cd42f12e..c6703b23aa 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -137,7 +137,7 @@ pub(crate) fn emit( // Certain instructions may be present in more than one ISA feature set; we must at least match // one of them in the target CPU. let isa_requirements = inst.available_in_any_isa(); - if !isa_requirements.is_empty() && !isa_requirements.iter().any(matches_isa_flags) { + if !isa_requirements.is_empty() && !isa_requirements.iter().all(matches_isa_flags) { panic!( "Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}", inst, isa_requirements diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 11acc3107e..9951842d2b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4324,10 +4324,11 @@ fn test_x64_emit() { let mut isa_flag_builder = x64::settings::builder(); isa_flag_builder.enable("has_ssse3").unwrap(); isa_flag_builder.enable("has_sse41").unwrap(); - isa_flag_builder.enable("has_avx512f").unwrap(); + isa_flag_builder.enable("has_avx512bitalg").unwrap(); isa_flag_builder.enable("has_avx512dq").unwrap(); - isa_flag_builder.enable("has_avx512vl").unwrap(); + isa_flag_builder.enable("has_avx512f").unwrap(); isa_flag_builder.enable("has_avx512vbmi").unwrap(); + isa_flag_builder.enable("has_avx512vl").unwrap(); let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder); let rru = regs::create_reg_universe_systemv(&flags); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 5e6b4670ab..d60b83345f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1668,8 +1668,8 @@ fn lower_insn_to_regs>( let rhs = put_input_in_reg(ctx, inputs[1]); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() { - // With the right AVX512 features (VL, DQ) this operation + if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512dq_simd() { + // With the right AVX512 features (VL + DQ) this operation // can lower to a single operation. ctx.emit(Inst::xmm_rm_r_evex( Avx512Opcode::Vpmullq, @@ -1905,7 +1905,7 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); if ty == types::I64X2 { - if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() { + if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() { ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst)); } else { // If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to @@ -2426,7 +2426,7 @@ fn lower_insn_to_regs>( )); } else if dst_ty == types::I64X2 && op == Opcode::Sshr { // The `sshr.i8x16` CLIF instruction has no single x86 instruction in the older feature sets; newer ones - // like AVX512VL and AVX512F include VPSRAQ, a 128-bit instruction that would fit here, but this backend + // like AVX512VL + AVX512F include VPSRAQ, a 128-bit instruction that would fit here, but this backend // does not currently have support for EVEX encodings (TODO when EVEX support is available, add an // alternate lowering here). To remedy this, we extract each 64-bit lane to a GPR, shift each using a // scalar instruction, and insert the shifted values back in the `dst` XMM register. @@ -3084,8 +3084,8 @@ fn lower_insn_to_regs>( let src = put_input_in_reg(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if isa_flags.use_avx512vl_simd() || isa_flags.use_avx512bitalg_simd() { - // When either AVX512VL or AVX512BITALG are available, + if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512bitalg_simd() { + // When AVX512VL and AVX512BITALG are available, // `popcnt.i8x16` can be lowered to a single instruction. assert_eq!(ty, types::I8X16); ctx.emit(Inst::xmm_unary_rm_r_evex( @@ -4163,8 +4163,8 @@ fn lower_insn_to_regs>( let src = put_input_in_reg(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() { - // When either AVX512VL or AVX512F are available, + if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() { + // When AVX512VL and AVX512F are available, // `fcvt_from_uint` can be lowered to a single instruction. ctx.emit(Inst::xmm_unary_rm_r_evex( Avx512Opcode::Vcvtudq2ps,