Add x86 encoding for vconst
This commit is contained in:
@@ -270,7 +270,7 @@ impl PerCpuModeEncodings {
|
|||||||
/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
|
/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
|
||||||
fn enc_32_64_maybe_isap(
|
fn enc_32_64_maybe_isap(
|
||||||
&mut self,
|
&mut self,
|
||||||
inst: BoundInstruction,
|
inst: impl Clone + Into<InstSpec>,
|
||||||
template: Template,
|
template: Template,
|
||||||
isap: Option<SettingPredicateNumber>,
|
isap: Option<SettingPredicateNumber>,
|
||||||
) {
|
) {
|
||||||
@@ -280,7 +280,7 @@ impl PerCpuModeEncodings {
|
|||||||
|
|
||||||
fn enc32_maybe_isap(
|
fn enc32_maybe_isap(
|
||||||
&mut self,
|
&mut self,
|
||||||
inst: BoundInstruction,
|
inst: impl Into<InstSpec>,
|
||||||
template: Template,
|
template: Template,
|
||||||
isap: Option<SettingPredicateNumber>,
|
isap: Option<SettingPredicateNumber>,
|
||||||
) {
|
) {
|
||||||
@@ -292,7 +292,7 @@ impl PerCpuModeEncodings {
|
|||||||
|
|
||||||
fn enc64_maybe_isap(
|
fn enc64_maybe_isap(
|
||||||
&mut self,
|
&mut self,
|
||||||
inst: BoundInstruction,
|
inst: impl Into<InstSpec>,
|
||||||
template: Template,
|
template: Template,
|
||||||
isap: Option<SettingPredicateNumber>,
|
isap: Option<SettingPredicateNumber>,
|
||||||
) {
|
) {
|
||||||
@@ -432,6 +432,7 @@ pub fn define(
|
|||||||
let uload8_complex = shared.by_name("uload8_complex");
|
let uload8_complex = shared.by_name("uload8_complex");
|
||||||
let ushr = shared.by_name("ushr");
|
let ushr = shared.by_name("ushr");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
|
let vconst = shared.by_name("vconst");
|
||||||
let x86_bsf = x86.by_name("x86_bsf");
|
let x86_bsf = x86.by_name("x86_bsf");
|
||||||
let x86_bsr = x86.by_name("x86_bsr");
|
let x86_bsr = x86.by_name("x86_bsr");
|
||||||
let x86_cvtt2si = x86.by_name("x86_cvtt2si");
|
let x86_cvtt2si = x86.by_name("x86_cvtt2si");
|
||||||
@@ -578,6 +579,7 @@ pub fn define(
|
|||||||
let rec_urm = r.template("urm");
|
let rec_urm = r.template("urm");
|
||||||
let rec_urm_noflags = r.template("urm_noflags");
|
let rec_urm_noflags = r.template("urm_noflags");
|
||||||
let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
|
let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
|
||||||
|
let rec_vconst = r.template("vconst");
|
||||||
|
|
||||||
// Predicates shorthands.
|
// Predicates shorthands.
|
||||||
let all_ones_funcaddrs_and_not_is_pic =
|
let all_ones_funcaddrs_and_not_is_pic =
|
||||||
@@ -1785,6 +1787,18 @@ pub fn define(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD vconst using MOVUPS
|
||||||
|
// TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
|
||||||
|
// to guarantee that the constants are aligned when emitted and there is currently no mechanism
|
||||||
|
// for that; alternately, constants could be loaded into XMM registers using a sequence like:
|
||||||
|
// MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
|
||||||
|
// in memory) but some performance measurements are needed.
|
||||||
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
||||||
|
let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
|
let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
|
||||||
|
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||||
|
}
|
||||||
|
|
||||||
// Reference type instructions
|
// Reference type instructions
|
||||||
|
|
||||||
// Null references implemented as iconst 0.
|
// Null references implemented as iconst 0.
|
||||||
|
|||||||
16
cranelift/filetests/filetests/isa/x86/compile-vconst.clif
Normal file
16
cranelift/filetests/filetests/isa/x86/compile-vconst.clif
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
test compile
|
||||||
|
set enable_simd=true
|
||||||
|
set probestack_enabled=false
|
||||||
|
target x86_64 haswell
|
||||||
|
|
||||||
|
; use baldrdash calling convention here for simplicity (avoids prologue, epilogue)
|
||||||
|
function %test_vconst_i32() -> i32x4 baldrdash_system_v {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i32x4 0x1234
|
||||||
|
return v0
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ebb0:
|
||||||
|
; nextln: v0 = vconst.i32x4 0x1234
|
||||||
|
; nextln: return v0
|
||||||
|
; nextln: }
|
||||||
11
cranelift/filetests/filetests/isa/x86/vconst.clif
Normal file
11
cranelift/filetests/filetests/isa/x86/vconst.clif
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
test binemit
|
||||||
|
set opt_level=best
|
||||||
|
set enable_simd
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function %test_vconst_b8() {
|
||||||
|
ebb0:
|
||||||
|
[-, %xmm2] v0 = vconst.b8x16 0x00 ; bin: 0f 10 15 00000008 PCRelRodata4(15)
|
||||||
|
[-, %xmm3] v1 = vconst.b8x16 0x01 ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
|
||||||
|
return
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user