Add x86 SIMD immediate shifts

This commit is contained in:
Andrew Brown
2019-11-11 11:55:04 -08:00
parent 6519a43b08
commit 1f17e35e95
5 changed files with 135 additions and 0 deletions

View File

@@ -553,6 +553,7 @@ pub(crate) fn define(
let rec_copysp = r.template("copysp");
let rec_div = r.template("div");
let rec_debugtrap = r.recipe("debugtrap");
let rec_f_ib = r.template("f_ib");
let rec_f32imm_z = r.template("f32imm_z");
let rec_f64imm_z = r.template("f64imm_z");
let rec_fa = r.template("fa");
@@ -2033,6 +2034,18 @@ pub(crate) fn define(
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
}
// SIMD immediate shift
for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
e.enc_32_64(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
e.enc_32_64(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
e.enc_32_64(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
}
// SIMD integer comparisons
{
use IntCC::*;

View File

@@ -431,6 +431,18 @@ pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
/// store the result in xmm1 (SSE2).
pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
/// digit used in the ModR/M byte (SSE2).
pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
/// digit used in the ModR/M byte (SSE2).
pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
/// digit used in the ModR/M byte (SSE2).
pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];

View File

@@ -792,6 +792,26 @@ pub(crate) fn define<'shared>(
),
);
recipes.add_template_recipe(
EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
.operands_in(vec![fpr])
.operands_out(vec![0])
.inst_predicate(InstructionPredicate::new_is_signed_int(
&*formats.binary_imm,
"imm",
8,
0,
))
.emit(
r#"
{{PUT_OP}}(bits, rex1(in_reg0), sink);
modrm_r_bits(in_reg0, bits, sink);
let imm: i64 = imm.into();
sink.put1(imm as u8);
"#,
),
);
// XX /n id with 32-bit immediate sign-extended.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)

View File

@@ -49,3 +49,57 @@ ebb0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
[-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0
return v2
}
function %ishl_imm_i16x8(i16x8) -> i16x8 {
ebb0(v0: i16x8 [%xmm2]):
[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03
return v2
}
function %ishl_imm_i32x4(i32x4) -> i32x4 {
ebb0(v0: i32x4 [%xmm4]):
[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a
return v2
}
function %ishl_imm_i64x2(i64x2) -> i64x2 {
ebb0(v0: i64x2 [%xmm6]):
[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a
return v2
}
function %ushr_imm_i16x8(i16x8) -> i16x8 {
ebb0(v0: i16x8 [%xmm2]):
[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03
return v2
}
function %ushr_imm_i32x4(i32x4) -> i32x4 {
ebb0(v0: i32x4 [%xmm4]):
[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a
return v2
}
function %ushr_imm_i64x2(i64x2) -> i64x2 {
ebb0(v0: i64x2 [%xmm6]):
[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a
return v2
}
function %sshr_imm_i16x8(i16x8) -> i16x8 {
ebb0(v0: i16x8 [%xmm2]):
[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03
return v2
}
function %sshr_imm_i32x4(i32x4) -> i32x4 {
ebb0(v0: i32x4 [%xmm4]):
[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a
return v2
}
function %sshr_imm_i64x2(i64x2) -> i64x2 {
ebb0(v0: i64x2 [%xmm6]):
[-, %xmm6] v2 = sshr_imm v0, 42 ; bin: 66 0f 73 e6 2a
return v2
}

View File

@@ -127,3 +127,39 @@ ebb0:
return v11
}
; run
function %sshr_imm_i32x4() -> b1 {
ebb0:
v1 = vconst.i32x4 [1 2 4 -8]
v2 = sshr_imm v1, 1
v3 = vconst.i32x4 [0 1 2 -4]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %sshr_imm_i16x8() -> b1 {
ebb0:
v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0]
v2 = ushr_imm v1, 1
v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run
function %ishl_imm_i64x2() -> b1 {
ebb0:
v1 = vconst.i64x2 [1 0]
v2 = ishl_imm v1, 1
v3 = vconst.i64x2 [2 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run