Encode [u|s]widen_low for x86
This commit is contained in:
@@ -1669,6 +1669,7 @@ fn define_simd(
|
|||||||
let ssub_sat = shared.by_name("ssub_sat");
|
let ssub_sat = shared.by_name("ssub_sat");
|
||||||
let store = shared.by_name("store");
|
let store = shared.by_name("store");
|
||||||
let store_complex = shared.by_name("store_complex");
|
let store_complex = shared.by_name("store_complex");
|
||||||
|
let swiden_low = shared.by_name("swiden_low");
|
||||||
let uadd_sat = shared.by_name("uadd_sat");
|
let uadd_sat = shared.by_name("uadd_sat");
|
||||||
let uload8x8 = shared.by_name("uload8x8");
|
let uload8x8 = shared.by_name("uload8x8");
|
||||||
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||||
@@ -1678,6 +1679,7 @@ fn define_simd(
|
|||||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||||
let snarrow = shared.by_name("snarrow");
|
let snarrow = shared.by_name("snarrow");
|
||||||
let unarrow = shared.by_name("unarrow");
|
let unarrow = shared.by_name("unarrow");
|
||||||
|
let uwiden_low = shared.by_name("uwiden_low");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
let usub_sat = shared.by_name("usub_sat");
|
let usub_sat = shared.by_name("usub_sat");
|
||||||
let vconst = shared.by_name("vconst");
|
let vconst = shared.by_name("vconst");
|
||||||
@@ -1915,6 +1917,16 @@ fn define_simd(
|
|||||||
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
|
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
|
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
|
||||||
}
|
}
|
||||||
|
for (ty, swiden_opcode, uwiden_opcode) in &[
|
||||||
|
(I8, &PMOVSXBW[..], &PMOVZXBW[..]),
|
||||||
|
(I16, &PMOVSXWD[..], &PMOVZXWD[..]),
|
||||||
|
] {
|
||||||
|
let isap = Some(use_sse41_simd);
|
||||||
|
let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
|
||||||
|
e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
|
||||||
|
let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
|
||||||
|
e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
|
||||||
|
}
|
||||||
for ty in &[I8, I16, I32, I64] {
|
for ty in &[I8, I16, I32, I64] {
|
||||||
e.enc_both_inferred_maybe_isap(
|
e.enc_both_inferred_maybe_isap(
|
||||||
x86_palignr.bind(vector(*ty, sse_vector_size)),
|
x86_palignr.bind(vector(*ty, sse_vector_size)),
|
||||||
|
|||||||
@@ -477,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
|
|||||||
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
||||||
|
|
||||||
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||||
/// integers in xmm1.
|
/// integers in xmm1 (SSE4.1).
|
||||||
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
||||||
|
|
||||||
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||||
@@ -489,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
|
|||||||
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
||||||
|
|
||||||
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||||
/// integers in xmm1.
|
/// integers in xmm1 (SSE4.1).
|
||||||
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
||||||
|
|
||||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||||
|
|||||||
@@ -3883,9 +3883,9 @@ pub(crate) fn define(
|
|||||||
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
||||||
);
|
);
|
||||||
|
|
||||||
let I16xN = &TypeVar::new(
|
let I16or32xN = &TypeVar::new(
|
||||||
"I16xN",
|
"I16or32xN",
|
||||||
"A SIMD vector type containing integers 16-bits wide and up",
|
"A SIMD vector type containing integer lanes 16 or 32 bits wide",
|
||||||
TypeSetBuilder::new()
|
TypeSetBuilder::new()
|
||||||
.ints(16..32)
|
.ints(16..32)
|
||||||
.simd_lanes(4..8)
|
.simd_lanes(4..8)
|
||||||
@@ -3893,9 +3893,9 @@ pub(crate) fn define(
|
|||||||
.build(),
|
.build(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", I16xN);
|
let x = &Operand::new("x", I16or32xN);
|
||||||
let y = &Operand::new("y", I16xN);
|
let y = &Operand::new("y", I16or32xN);
|
||||||
let a = &Operand::new("a", &I16xN.split_lanes());
|
let a = &Operand::new("a", &I16or32xN.split_lanes());
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
@@ -3934,6 +3934,75 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let I8or16xN = &TypeVar::new(
|
||||||
|
"I8or16xN",
|
||||||
|
"A SIMD vector type containing integer lanes 8 or 16 bits wide.",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.ints(8..16)
|
||||||
|
.simd_lanes(8..16)
|
||||||
|
.includes_scalars(false)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", I8or16xN);
|
||||||
|
let a = &Operand::new("a", &I8or16xN.merge_lanes());
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"swiden_low",
|
||||||
|
r#"
|
||||||
|
Widen the low lanes of `x` using signed extension.
|
||||||
|
|
||||||
|
This will double the lane width and halve the number of lanes.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"swiden_high",
|
||||||
|
r#"
|
||||||
|
Widen the high lanes of `x` using signed extension.
|
||||||
|
|
||||||
|
This will double the lane width and halve the number of lanes.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uwiden_low",
|
||||||
|
r#"
|
||||||
|
Widen the low lanes of `x` using unsigned extension.
|
||||||
|
|
||||||
|
This will double the lane width and halve the number of lanes.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uwiden_high",
|
||||||
|
r#"
|
||||||
|
Widen the high lanes of `x` using unsigned extension.
|
||||||
|
|
||||||
|
This will double the lane width and halve the number of lanes.
|
||||||
|
"#,
|
||||||
|
&formats.unary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let IntTo = &TypeVar::new(
|
let IntTo = &TypeVar::new(
|
||||||
"IntTo",
|
"IntTo",
|
||||||
"A larger integer type with the same number of lanes",
|
"A larger integer type with the same number of lanes",
|
||||||
|
|||||||
@@ -2154,7 +2154,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::AvgRound => unimplemented!(),
|
Opcode::AvgRound => unimplemented!(),
|
||||||
Opcode::Iabs => unimplemented!(),
|
Opcode::Iabs => unimplemented!(),
|
||||||
Opcode::Snarrow | Opcode::Unarrow => unimplemented!(),
|
Opcode::Snarrow
|
||||||
|
| Opcode::Unarrow
|
||||||
|
| Opcode::SwidenLow
|
||||||
|
| Opcode::SwidenHigh
|
||||||
|
| Opcode::UwidenLow
|
||||||
|
| Opcode::UwidenHigh => unimplemented!(),
|
||||||
Opcode::TlsValue => unimplemented!(),
|
Opcode::TlsValue => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
test binemit
|
test binemit
|
||||||
set enable_simd
|
set enable_simd
|
||||||
target x86_64 has_ssse3=true
|
target x86_64 nehalem
|
||||||
|
|
||||||
; Ensure raw_bitcast emits no instructions.
|
; Ensure raw_bitcast emits no instructions.
|
||||||
function %raw_bitcast_i16x8_to_b32x4() {
|
function %raw_bitcast_i16x8_to_b32x4() {
|
||||||
@@ -17,3 +17,10 @@ block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
|
|||||||
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
|
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %conversions_i16x8(i16x8) {
|
||||||
|
block0(v0: i16x8 [%xmm6]):
|
||||||
|
[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6
|
||||||
|
[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user