diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 8afe4e400b..e226117149 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1655,10 +1655,12 @@ fn define_simd( let x86_ptest = x86.by_name("x86_ptest"); let x86_punpckh = x86.by_name("x86_punpckh"); let x86_punpckl = x86.by_name("x86_punpckl"); + let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); // Shorthands for recipes. let rec_blend = r.template("blend"); let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); + let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); let rec_f_ib = r.template("f_ib"); let rec_fa = r.template("fa"); let rec_fa_ib = r.template("fa_ib"); @@ -1702,6 +1704,7 @@ fn define_simd( let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); + let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); // SIMD vector size: eventually multiple vector sizes may be supported but for now only // SSE-sized vectors are available. @@ -1885,6 +1888,12 @@ fn define_simd( .bind(vector(F32, sse_vector_size)) .bind(vector(I32, sse_vector_size)); e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); + + e.enc_32_64_maybe_isap( + x86_vcvtudq2ps, + rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), + Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F + ); } // SIMD vconst for special cases (all zeroes, all ones) diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 53d91ca861..5e9c80e6ad 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -145,6 +145,37 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let f32x4 = &TypeVar::new( + "f32x4", + "A floating point number", + TypeSetBuilder::new() + .floats(32..32) + .simd_lanes(4..4) + .build(), + ); + let i32x4 = &TypeVar::new( + "i32x4", + "An integer type with the same number of lanes", + TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), + ); + let x = &Operand::new("x", i32x4); + let a = &Operand::new("a", f32x4); + + ig.push( + Inst::new( + "x86_vcvtudq2ps", + r#" + Convert unsigned integer to floating point. + + Convert packed doubleword unsigned integers to packed single-precision floating-point + values. This instruction does not trap. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let x = &Operand::new("x", Float); let a = &Operand::new("a", Float); let y = &Operand::new("y", Float); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index c1d4fa0ef5..23efc620d2 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -665,6 +665,12 @@ pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; /// Raise invalid opcode instruction. pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; +/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed +/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior +/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode +/// (AVX512VL, AVX512F). +pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; + /// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. pub static XOR_IMM: [u8; 1] = [0x81]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 0cfd83d373..74645d0b59 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -3417,5 +3417,23 @@ pub(crate) fn define<'shared>( regs).rex_kind(RecipePrefixKind::Evex) ); + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .emit( + r#" + // instruction encoding operands: reg (op1, w), rm (op2, r) + // this maps to: out_reg0, in_reg0 + let context = EvexContext::Other { length: EvexVectorLength::V128 }; + let masking = EvexMasking::None; + put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm + modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg + "#, + ), + regs).rex_kind(RecipePrefixKind::Evex) + ); + recipes } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index bce7276552..2e17cd7b0e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2066,6 +2066,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Packss | Opcode::X86Punpckh | Opcode::X86Punpckl + | Opcode::X86Vcvtudq2ps | Opcode::X86ElfTlsGetAddr | Opcode::X86MachoTlsGetAddr => { panic!("x86-specific opcode in supposedly arch-neutral IR!"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif new file mode 100644 index 0000000000..37abef0e61 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif @@ -0,0 +1,9 @@ +test binemit +set enable_simd +target x86_64 has_avx512vl=true + +function %fcvt_from_uint(i32x4) { +block0(v0: i32x4 [%xmm2]): +[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2 + return +}