Add x86 implementation of extractlane instruction
This commit is contained in:
@@ -318,6 +318,7 @@ pub fn define(
|
|||||||
let copy_special = shared.by_name("copy_special");
|
let copy_special = shared.by_name("copy_special");
|
||||||
let ctz = shared.by_name("ctz");
|
let ctz = shared.by_name("ctz");
|
||||||
let debugtrap = shared.by_name("debugtrap");
|
let debugtrap = shared.by_name("debugtrap");
|
||||||
|
let extractlane = shared.by_name("extractlane");
|
||||||
let f32const = shared.by_name("f32const");
|
let f32const = shared.by_name("f32const");
|
||||||
let f64const = shared.by_name("f64const");
|
let f64const = shared.by_name("f64const");
|
||||||
let fadd = shared.by_name("fadd");
|
let fadd = shared.by_name("fadd");
|
||||||
@@ -498,7 +499,8 @@ pub fn define(
|
|||||||
let rec_pushq = r.template("pushq");
|
let rec_pushq = r.template("pushq");
|
||||||
let rec_ret = r.template("ret");
|
let rec_ret = r.template("ret");
|
||||||
let rec_r_ib = r.template("r_ib");
|
let rec_r_ib = r.template("r_ib");
|
||||||
let rec_r_ib_unsigned = r.template("r_ib_unsigned");
|
let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
|
||||||
|
let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
|
||||||
let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
|
let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
|
||||||
let rec_r_id = r.template("r_id");
|
let rec_r_id = r.template("r_id");
|
||||||
let rec_rcmp = r.template("rcmp");
|
let rec_rcmp = r.template("rcmp");
|
||||||
@@ -1642,7 +1644,9 @@ pub fn define(
|
|||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
||||||
let number_of_lanes = 128 / ty.lane_bits();
|
let number_of_lanes = 128 / ty.lane_bits();
|
||||||
let instruction = x86_pshufd.bind_vector(ty, number_of_lanes);
|
let instruction = x86_pshufd.bind_vector(ty, number_of_lanes);
|
||||||
let template = rec_r_ib_unsigned.nonrex().opcodes(vec![0x66, 0x0f, 0x70]);
|
let template = rec_r_ib_unsigned_fpr
|
||||||
|
.nonrex()
|
||||||
|
.opcodes(vec![0x66, 0x0f, 0x70]);
|
||||||
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
|
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
|
||||||
e.enc64_isap(instruction, template, use_sse2);
|
e.enc64_isap(instruction, template, use_sse2);
|
||||||
}
|
}
|
||||||
@@ -1682,6 +1686,27 @@ pub fn define(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD extractlane
|
||||||
|
let mut extractlane_mapping: HashMap<u64, (Vec<u8>, SettingPredicateNumber)> = HashMap::new();
|
||||||
|
extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], use_sse41)); // PEXTRB
|
||||||
|
extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], use_sse2)); // PEXTRW, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes
|
||||||
|
extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRD
|
||||||
|
extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRQ, only x86_64
|
||||||
|
|
||||||
|
for ty in ValueType::all_lane_types() {
|
||||||
|
if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) {
|
||||||
|
let number_of_lanes = 128 / ty.lane_bits();
|
||||||
|
let instruction = extractlane.bind_vector(ty, number_of_lanes);
|
||||||
|
let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone());
|
||||||
|
if ty.lane_bits() < 64 {
|
||||||
|
e.enc_32_64_isap(instruction, template.nonrex(), isap.clone());
|
||||||
|
} else {
|
||||||
|
// turns out the 64-bit widths have REX/W encodings and only are available on x86_64
|
||||||
|
e.enc64_isap(instruction, template.rex().w(), isap.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register
|
// SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
||||||
let instruction = bitcast.bind_vector(ty, 16).bind(F64);
|
let instruction = bitcast.bind_vector(ty, 16).bind(F64);
|
||||||
|
|||||||
@@ -800,7 +800,7 @@ pub fn define<'shared>(
|
|||||||
{
|
{
|
||||||
let format = formats.get(f_extract_lane);
|
let format = formats.get(f_extract_lane);
|
||||||
recipes.add_template_recipe(
|
recipes.add_template_recipe(
|
||||||
EncodingRecipeBuilder::new("r_ib_unsigned", f_extract_lane, 2)
|
EncodingRecipeBuilder::new("r_ib_unsigned_fpr", f_extract_lane, 2)
|
||||||
.operands_in(vec![fpr])
|
.operands_in(vec![fpr])
|
||||||
.operands_out(vec![fpr])
|
.operands_out(vec![fpr])
|
||||||
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||||
@@ -817,6 +817,27 @@ pub fn define<'shared>(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
|
||||||
|
{
|
||||||
|
let format = formats.get(f_extract_lane);
|
||||||
|
recipes.add_template_recipe(
|
||||||
|
EncodingRecipeBuilder::new("r_ib_unsigned_gpr", f_extract_lane, 2)
|
||||||
|
.operands_in(vec![fpr])
|
||||||
|
.operands_out(vec![gpr])
|
||||||
|
.inst_predicate(InstructionPredicate::new_is_unsigned_int(
|
||||||
|
format, "lane", 8, 0,
|
||||||
|
))
|
||||||
|
.emit(
|
||||||
|
r#"
|
||||||
|
{{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
|
||||||
|
modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
|
||||||
|
let imm:i64 = lane.into();
|
||||||
|
sink.put1(imm as u8);
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
// XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
|
||||||
{
|
{
|
||||||
let format = formats.get(f_insert_lane);
|
let format = formats.get(f_insert_lane);
|
||||||
|
|||||||
35
cranelift/filetests/filetests/isa/x86/extractlane.clif
Normal file
35
cranelift/filetests/filetests/isa/x86/extractlane.clif
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
test binemit
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 haswell
|
||||||
|
|
||||||
|
function %test_extractlane_b8() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rax] v0 = bconst.b8 true
|
||||||
|
[-, %xmm0] v1 = splat.b8x16 v0
|
||||||
|
[-, %rax] v2 = extractlane v1, 10 ; bin: 66 0f 3a 14 c0 0a
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_extractlane_i16() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rax] v0 = iconst.i16 4
|
||||||
|
[-, %xmm1] v1 = splat.i16x8 v0
|
||||||
|
[-, %rax] v2 = extractlane v1, 4 ; bin: 66 0f c5 c8 04
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_extractlane_i32() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rax] v0 = iconst.i32 42
|
||||||
|
[-, %xmm4] v1 = splat.i32x4 v0
|
||||||
|
[-, %rcx] v2 = extractlane v1, 2 ; bin: 66 0f 3a 16 e1 02
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_extractlane_f64() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rax] v0 = f64const 0x0.0
|
||||||
|
[-, %xmm2] v1 = splat.f64x2 v0
|
||||||
|
[-, %rbx] v2 = extractlane v1, 1 ; bin: 66 48 0f 3a 16 d3 01
|
||||||
|
return
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user