Add x86_vcvtudq2ps instruction
This instruction converts i32x4 to f32x4 in several AVX512 feature sets.
This commit is contained in:
@@ -1655,10 +1655,12 @@ fn define_simd(
|
|||||||
let x86_ptest = x86.by_name("x86_ptest");
|
let x86_ptest = x86.by_name("x86_ptest");
|
||||||
let x86_punpckh = x86.by_name("x86_punpckh");
|
let x86_punpckh = x86.by_name("x86_punpckh");
|
||||||
let x86_punpckl = x86.by_name("x86_punpckl");
|
let x86_punpckl = x86.by_name("x86_punpckl");
|
||||||
|
let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps");
|
||||||
|
|
||||||
// Shorthands for recipes.
|
// Shorthands for recipes.
|
||||||
let rec_blend = r.template("blend");
|
let rec_blend = r.template("blend");
|
||||||
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
||||||
|
let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128");
|
||||||
let rec_f_ib = r.template("f_ib");
|
let rec_f_ib = r.template("f_ib");
|
||||||
let rec_fa = r.template("fa");
|
let rec_fa = r.template("fa");
|
||||||
let rec_fa_ib = r.template("fa_ib");
|
let rec_fa_ib = r.template("fa_ib");
|
||||||
@@ -1702,6 +1704,7 @@ fn define_simd(
|
|||||||
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
||||||
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
|
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
|
||||||
let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
|
let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
|
||||||
|
let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd");
|
||||||
|
|
||||||
// SIMD vector size: eventually multiple vector sizes may be supported but for now only
|
// SIMD vector size: eventually multiple vector sizes may be supported but for now only
|
||||||
// SSE-sized vectors are available.
|
// SSE-sized vectors are available.
|
||||||
@@ -1885,6 +1888,12 @@ fn define_simd(
|
|||||||
.bind(vector(F32, sse_vector_size))
|
.bind(vector(F32, sse_vector_size))
|
||||||
.bind(vector(I32, sse_vector_size));
|
.bind(vector(I32, sse_vector_size));
|
||||||
e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
|
e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
|
||||||
|
|
||||||
|
e.enc_32_64_maybe_isap(
|
||||||
|
x86_vcvtudq2ps,
|
||||||
|
rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
|
||||||
|
Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD vconst for special cases (all zeroes, all ones)
|
// SIMD vconst for special cases (all zeroes, all ones)
|
||||||
|
|||||||
@@ -145,6 +145,37 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let f32x4 = &TypeVar::new(
|
||||||
|
"f32x4",
|
||||||
|
"A floating point number",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.floats(32..32)
|
||||||
|
.simd_lanes(4..4)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
let i32x4 = &TypeVar::new(
|
||||||
|
"i32x4",
|
||||||
|
"An integer type with the same number of lanes",
|
||||||
|
TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
|
||||||
|
);
|
||||||
|
let x = &Operand::new("x", i32x4);
|
||||||
|
let a = &Operand::new("a", f32x4);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_vcvtudq2ps",
|
||||||
|
r#"
|
||||||
|
Convert unsigned integer to floating point.
|
||||||
|
|
||||||
|
Convert packed doubleword unsigned integers to packed single-precision floating-point
|
||||||
|
values. This instruction does not trap.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", Float);
|
let x = &Operand::new("x", Float);
|
||||||
let a = &Operand::new("a", Float);
|
let a = &Operand::new("a", Float);
|
||||||
let y = &Operand::new("y", Float);
|
let y = &Operand::new("y", Float);
|
||||||
|
|||||||
@@ -665,6 +665,12 @@ pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
|
|||||||
/// Raise invalid opcode instruction.
|
/// Raise invalid opcode instruction.
|
||||||
pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
|
pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
|
||||||
|
|
||||||
|
/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
|
||||||
|
/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
|
||||||
|
/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
|
||||||
|
/// (AVX512VL, AVX512F).
|
||||||
|
pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
|
||||||
|
|
||||||
/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
|
/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
|
||||||
pub static XOR_IMM: [u8; 1] = [0x81];
|
pub static XOR_IMM: [u8; 1] = [0x81];
|
||||||
|
|
||||||
|
|||||||
@@ -3417,5 +3417,23 @@ pub(crate) fn define<'shared>(
|
|||||||
regs).rex_kind(RecipePrefixKind::Evex)
|
regs).rex_kind(RecipePrefixKind::Evex)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
recipes.add_template(
|
||||||
|
Template::new(
|
||||||
|
EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
|
||||||
|
.operands_in(vec![fpr])
|
||||||
|
.operands_out(vec![fpr])
|
||||||
|
.emit(
|
||||||
|
r#"
|
||||||
|
// instruction encoding operands: reg (op1, w), rm (op2, r)
|
||||||
|
// this maps to: out_reg0, in_reg0
|
||||||
|
let context = EvexContext::Other { length: EvexVectorLength::V128 };
|
||||||
|
let masking = EvexMasking::None;
|
||||||
|
put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
|
||||||
|
modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
regs).rex_kind(RecipePrefixKind::Evex)
|
||||||
|
);
|
||||||
|
|
||||||
recipes
|
recipes
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2066,6 +2066,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::X86Packss
|
| Opcode::X86Packss
|
||||||
| Opcode::X86Punpckh
|
| Opcode::X86Punpckh
|
||||||
| Opcode::X86Punpckl
|
| Opcode::X86Punpckl
|
||||||
|
| Opcode::X86Vcvtudq2ps
|
||||||
| Opcode::X86ElfTlsGetAddr
|
| Opcode::X86ElfTlsGetAddr
|
||||||
| Opcode::X86MachoTlsGetAddr => {
|
| Opcode::X86MachoTlsGetAddr => {
|
||||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
test binemit
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 has_avx512vl=true
|
||||||
|
|
||||||
|
function %fcvt_from_uint(i32x4) {
|
||||||
|
block0(v0: i32x4 [%xmm2]):
|
||||||
|
[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2
|
||||||
|
return
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user