Add x86 complex encodings for SIMD load-extend instructions

This commit is contained in:
Andrew Brown
2020-04-30 10:20:59 -07:00
parent 38dff29179
commit a312506262
3 changed files with 127 additions and 2 deletions

View File

@@ -1607,8 +1607,11 @@ fn define_simd(
let sadd_sat = shared.by_name("sadd_sat"); let sadd_sat = shared.by_name("sadd_sat");
let scalar_to_vector = shared.by_name("scalar_to_vector"); let scalar_to_vector = shared.by_name("scalar_to_vector");
let sload8x8 = shared.by_name("sload8x8"); let sload8x8 = shared.by_name("sload8x8");
let sload8x8_complex = shared.by_name("sload8x8_complex");
let sload16x4 = shared.by_name("sload16x4"); let sload16x4 = shared.by_name("sload16x4");
let sload16x4_complex = shared.by_name("sload16x4_complex");
let sload32x2 = shared.by_name("sload32x2"); let sload32x2 = shared.by_name("sload32x2");
let sload32x2_complex = shared.by_name("sload32x2_complex");
let spill = shared.by_name("spill"); let spill = shared.by_name("spill");
let sqrt = shared.by_name("sqrt"); let sqrt = shared.by_name("sqrt");
let sshr_imm = shared.by_name("sshr_imm"); let sshr_imm = shared.by_name("sshr_imm");
@@ -1617,8 +1620,11 @@ fn define_simd(
let store_complex = shared.by_name("store_complex"); let store_complex = shared.by_name("store_complex");
let uadd_sat = shared.by_name("uadd_sat"); let uadd_sat = shared.by_name("uadd_sat");
let uload8x8 = shared.by_name("uload8x8"); let uload8x8 = shared.by_name("uload8x8");
let uload8x8_complex = shared.by_name("uload8x8_complex");
let uload16x4 = shared.by_name("uload16x4"); let uload16x4 = shared.by_name("uload16x4");
let uload16x4_complex = shared.by_name("uload16x4_complex");
let uload32x2 = shared.by_name("uload32x2"); let uload32x2 = shared.by_name("uload32x2");
let uload32x2_complex = shared.by_name("uload32x2_complex");
let ushr_imm = shared.by_name("ushr_imm"); let ushr_imm = shared.by_name("ushr_imm");
let usub_sat = shared.by_name("usub_sat"); let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst"); let vconst = shared.by_name("vconst");
@@ -1983,6 +1989,35 @@ fn define_simd(
} }
} }
// SIMD load extend (complex addressing)
let is_load_complex_length_two =
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
for (inst, opcodes) in &[
(uload8x8_complex, &PMOVZXBW),
(uload16x4_complex, &PMOVZXWD),
(uload32x2_complex, &PMOVZXDQ),
(sload8x8_complex, &PMOVSXBW),
(sload16x4_complex, &PMOVSXWD),
(sload32x2_complex, &PMOVSXDQ),
] {
for recipe in &[
rec_fldWithIndex,
rec_fldWithIndexDisp8,
rec_fldWithIndexDisp32,
] {
let template = recipe.opcodes(*opcodes);
let predicate = |encoding: EncodingBuilder| {
encoding
.isa_predicate(use_sse41_simd)
.inst_predicate(is_load_complex_length_two.clone())
};
e.enc32_func(inst.clone(), template.clone(), predicate);
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
e.enc64_func(inst.clone(), template.rex(), predicate);
e.enc64_func(inst.clone(), template, predicate);
}
}
// SIMD integer addition // SIMD integer addition
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
let iadd = iadd.bind(vector(*ty, sse_vector_size)); let iadd = iadd.bind(vector(*ty, sse_vector_size));

View File

@@ -1172,6 +1172,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"uload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push( ig.push(
Inst::new( Inst::new(
"sload8x8", "sload8x8",
@@ -1186,6 +1200,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"sload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let I32x4 = &TypeVar::new( let I32x4 = &TypeVar::new(
"I32x4", "I32x4",
"A SIMD vector with exactly 4 lanes of 32-bit values", "A SIMD vector with exactly 4 lanes of 32-bit values",
@@ -1201,7 +1229,7 @@ pub(crate) fn define(
Inst::new( Inst::new(
"uload16x4", "uload16x4",
r#" r#"
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
vector. vector.
"#, "#,
&formats.load, &formats.load,
@@ -1211,6 +1239,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"uload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push( ig.push(
Inst::new( Inst::new(
"sload16x4", "sload16x4",
@@ -1225,6 +1267,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"sload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let I64x2 = &TypeVar::new( let I64x2 = &TypeVar::new(
"I64x2", "I64x2",
"A SIMD vector with exactly 2 lanes of 64-bit values", "A SIMD vector with exactly 2 lanes of 64-bit values",
@@ -1250,6 +1306,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"uload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
ig.push( ig.push(
Inst::new( Inst::new(
"sload32x2", "sload32x2",
@@ -1264,6 +1334,20 @@ pub(crate) fn define(
.can_load(true), .can_load(true),
); );
ig.push(
Inst::new(
"sload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);
let x = &Operand::new("x", Mem).with_doc("Value to be stored"); let x = &Operand::new("x", Mem).with_doc("Value to be stored");
let a = &Operand::new("a", Mem).with_doc("Value loaded"); let a = &Operand::new("a", Mem).with_doc("Value loaded");
let Offset = let Offset =

View File

@@ -1354,11 +1354,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRIns
| Opcode::ScalarToVector | Opcode::ScalarToVector
| Opcode::Swizzle | Opcode::Swizzle
| Opcode::Uload8x8 | Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8 | Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4 | Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4 | Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2 | Opcode::Uload32x2
| Opcode::Sload32x2 => { | Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO // TODO
panic!("Vector ops not implemented."); panic!("Vector ops not implemented.");
} }