Add x86 implementation of extractlane instruction

This commit is contained in:
Andrew Brown
2019-07-11 15:49:28 -07:00
committed by Dan Gohman
parent 036b9aea94
commit 3fdc78174f
3 changed files with 84 additions and 3 deletions

View File

@@ -318,6 +318,7 @@ pub fn define(
let copy_special = shared.by_name("copy_special");
let ctz = shared.by_name("ctz");
let debugtrap = shared.by_name("debugtrap");
let extractlane = shared.by_name("extractlane");
let f32const = shared.by_name("f32const");
let f64const = shared.by_name("f64const");
let fadd = shared.by_name("fadd");
@@ -498,7 +499,8 @@ pub fn define(
let rec_pushq = r.template("pushq");
let rec_ret = r.template("ret");
let rec_r_ib = r.template("r_ib");
let rec_r_ib_unsigned = r.template("r_ib_unsigned");
let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
let rec_r_id = r.template("r_id");
let rec_rcmp = r.template("rcmp");
@@ -1642,7 +1644,9 @@ pub fn define(
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = x86_pshufd.bind_vector(ty, number_of_lanes);
let template = rec_r_ib_unsigned.nonrex().opcodes(vec![0x66, 0x0f, 0x70]);
let template = rec_r_ib_unsigned_fpr
.nonrex()
.opcodes(vec![0x66, 0x0f, 0x70]);
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
e.enc64_isap(instruction, template, use_sse2);
}
@@ -1682,6 +1686,27 @@ pub fn define(
}
}
// SIMD extractlane
let mut extractlane_mapping: HashMap<u64, (Vec<u8>, SettingPredicateNumber)> = HashMap::new();
extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], use_sse41)); // PEXTRB
extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], use_sse2)); // PEXTRW, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes
extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRD
extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], use_sse41)); // PEXTRQ, only x86_64
for ty in ValueType::all_lane_types() {
if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = extractlane.bind_vector(ty, number_of_lanes);
let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone());
if ty.lane_bits() < 64 {
e.enc_32_64_isap(instruction, template.nonrex(), isap.clone());
} else {
// turns out the 64-bit widths have REX/W encodings and only are available on x86_64
e.enc64_isap(instruction, template.rex().w(), isap.clone());
}
}
}
// SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let instruction = bitcast.bind_vector(ty, 16).bind(F64);