Add x86 complex encodings for SIMD load-extend instructions
This commit is contained in:
@@ -1607,8 +1607,11 @@ fn define_simd(
|
|||||||
let sadd_sat = shared.by_name("sadd_sat");
|
let sadd_sat = shared.by_name("sadd_sat");
|
||||||
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||||
let sload8x8 = shared.by_name("sload8x8");
|
let sload8x8 = shared.by_name("sload8x8");
|
||||||
|
let sload8x8_complex = shared.by_name("sload8x8_complex");
|
||||||
let sload16x4 = shared.by_name("sload16x4");
|
let sload16x4 = shared.by_name("sload16x4");
|
||||||
|
let sload16x4_complex = shared.by_name("sload16x4_complex");
|
||||||
let sload32x2 = shared.by_name("sload32x2");
|
let sload32x2 = shared.by_name("sload32x2");
|
||||||
|
let sload32x2_complex = shared.by_name("sload32x2_complex");
|
||||||
let spill = shared.by_name("spill");
|
let spill = shared.by_name("spill");
|
||||||
let sqrt = shared.by_name("sqrt");
|
let sqrt = shared.by_name("sqrt");
|
||||||
let sshr_imm = shared.by_name("sshr_imm");
|
let sshr_imm = shared.by_name("sshr_imm");
|
||||||
@@ -1617,8 +1620,11 @@ fn define_simd(
|
|||||||
let store_complex = shared.by_name("store_complex");
|
let store_complex = shared.by_name("store_complex");
|
||||||
let uadd_sat = shared.by_name("uadd_sat");
|
let uadd_sat = shared.by_name("uadd_sat");
|
||||||
let uload8x8 = shared.by_name("uload8x8");
|
let uload8x8 = shared.by_name("uload8x8");
|
||||||
|
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||||
let uload16x4 = shared.by_name("uload16x4");
|
let uload16x4 = shared.by_name("uload16x4");
|
||||||
|
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
||||||
let uload32x2 = shared.by_name("uload32x2");
|
let uload32x2 = shared.by_name("uload32x2");
|
||||||
|
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
let usub_sat = shared.by_name("usub_sat");
|
let usub_sat = shared.by_name("usub_sat");
|
||||||
let vconst = shared.by_name("vconst");
|
let vconst = shared.by_name("vconst");
|
||||||
@@ -1983,6 +1989,35 @@ fn define_simd(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD load extend (complex addressing)
|
||||||
|
let is_load_complex_length_two =
|
||||||
|
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
|
||||||
|
for (inst, opcodes) in &[
|
||||||
|
(uload8x8_complex, &PMOVZXBW),
|
||||||
|
(uload16x4_complex, &PMOVZXWD),
|
||||||
|
(uload32x2_complex, &PMOVZXDQ),
|
||||||
|
(sload8x8_complex, &PMOVSXBW),
|
||||||
|
(sload16x4_complex, &PMOVSXWD),
|
||||||
|
(sload32x2_complex, &PMOVSXDQ),
|
||||||
|
] {
|
||||||
|
for recipe in &[
|
||||||
|
rec_fldWithIndex,
|
||||||
|
rec_fldWithIndexDisp8,
|
||||||
|
rec_fldWithIndexDisp32,
|
||||||
|
] {
|
||||||
|
let template = recipe.opcodes(*opcodes);
|
||||||
|
let predicate = |encoding: EncodingBuilder| {
|
||||||
|
encoding
|
||||||
|
.isa_predicate(use_sse41_simd)
|
||||||
|
.inst_predicate(is_load_complex_length_two.clone())
|
||||||
|
};
|
||||||
|
e.enc32_func(inst.clone(), template.clone(), predicate);
|
||||||
|
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
|
||||||
|
e.enc64_func(inst.clone(), template.rex(), predicate);
|
||||||
|
e.enc64_func(inst.clone(), template, predicate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD integer addition
|
// SIMD integer addition
|
||||||
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
|
||||||
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
let iadd = iadd.bind(vector(*ty, sse_vector_size));
|
||||||
|
|||||||
@@ -1172,6 +1172,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uload8x8_complex",
|
||||||
|
r#"
|
||||||
|
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||||
|
i16x8 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"sload8x8",
|
"sload8x8",
|
||||||
@@ -1186,6 +1200,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"sload8x8_complex",
|
||||||
|
r#"
|
||||||
|
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||||
|
i16x8 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
let I32x4 = &TypeVar::new(
|
let I32x4 = &TypeVar::new(
|
||||||
"I32x4",
|
"I32x4",
|
||||||
"A SIMD vector with exactly 4 lanes of 32-bit values",
|
"A SIMD vector with exactly 4 lanes of 32-bit values",
|
||||||
@@ -1201,7 +1229,7 @@ pub(crate) fn define(
|
|||||||
Inst::new(
|
Inst::new(
|
||||||
"uload16x4",
|
"uload16x4",
|
||||||
r#"
|
r#"
|
||||||
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
|
||||||
vector.
|
vector.
|
||||||
"#,
|
"#,
|
||||||
&formats.load,
|
&formats.load,
|
||||||
@@ -1211,6 +1239,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uload16x4_complex",
|
||||||
|
r#"
|
||||||
|
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||||
|
i32x4 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"sload16x4",
|
"sload16x4",
|
||||||
@@ -1225,6 +1267,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"sload16x4_complex",
|
||||||
|
r#"
|
||||||
|
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||||
|
i32x4 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
let I64x2 = &TypeVar::new(
|
let I64x2 = &TypeVar::new(
|
||||||
"I64x2",
|
"I64x2",
|
||||||
"A SIMD vector with exactly 2 lanes of 64-bit values",
|
"A SIMD vector with exactly 2 lanes of 64-bit values",
|
||||||
@@ -1250,6 +1306,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"uload32x2_complex",
|
||||||
|
r#"
|
||||||
|
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
|
||||||
|
i64x2 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"sload32x2",
|
"sload32x2",
|
||||||
@@ -1264,6 +1334,20 @@ pub(crate) fn define(
|
|||||||
.can_load(true),
|
.can_load(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"sload32x2_complex",
|
||||||
|
r#"
|
||||||
|
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
|
||||||
|
i64x2 vector.
|
||||||
|
"#,
|
||||||
|
&formats.load_complex,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, args, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", Mem).with_doc("Value to be stored");
|
let x = &Operand::new("x", Mem).with_doc("Value to be stored");
|
||||||
let a = &Operand::new("a", Mem).with_doc("Value loaded");
|
let a = &Operand::new("a", Mem).with_doc("Value loaded");
|
||||||
let Offset =
|
let Offset =
|
||||||
|
|||||||
@@ -1354,11 +1354,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
|
|||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
| Opcode::Uload8x8
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Uload8x8Complex
|
||||||
| Opcode::Sload8x8
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Sload8x8Complex
|
||||||
| Opcode::Uload16x4
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Uload16x4Complex
|
||||||
| Opcode::Sload16x4
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Sload16x4Complex
|
||||||
| Opcode::Uload32x2
|
| Opcode::Uload32x2
|
||||||
| Opcode::Sload32x2 => {
|
| Opcode::Uload32x2Complex
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Sload32x2Complex => {
|
||||||
// TODO
|
// TODO
|
||||||
panic!("Vector ops not implemented.");
|
panic!("Vector ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user