Add x86 SIMD sshr and ushr

Only the shifts with applicable SSE2 instructions are implemented here: PSRL* (for ushr) only has 16-64 bit instructions and PSRA* (for sshr) only has 16-32 bit instructions.
This commit is contained in:
Andrew Brown
2019-10-07 10:38:35 -07:00
parent 808885ce56
commit f1904bffea
7 changed files with 197 additions and 2 deletions

View File

@@ -52,10 +52,12 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let splat = insts.by_name("splat");
let shuffle = insts.by_name("shuffle");
let srem = insts.by_name("srem");
let sshr = insts.by_name("sshr");
let udiv = insts.by_name("udiv");
let umulhi = insts.by_name("umulhi");
let ushr_imm = insts.by_name("ushr_imm");
let urem = insts.by_name("urem");
let ushr = insts.by_name("ushr");
let vconst = insts.by_name("vconst");
let x86_bsf = x86_instructions.by_name("x86_bsf");
@@ -63,6 +65,8 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let x86_pshufb = x86_instructions.by_name("x86_pshufb");
let x86_pshufd = x86_instructions.by_name("x86_pshufd");
let x86_psll = x86_instructions.by_name("x86_psll");
let x86_psra = x86_instructions.by_name("x86_psra");
let x86_psrl = x86_instructions.by_name("x86_psrl");
let x86_umulx = x86_instructions.by_name("x86_umulx");
let x86_smulx = x86_instructions.by_name("x86_smulx");
@@ -397,7 +401,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
}
// SIMD shift left
// SIMD shift left (logical)
for ty in &[I16, I32, I64] {
let ishl = ishl.bind(vector(*ty, sse_vector_size));
let bitcast = bitcast.bind(vector(I64, sse_vector_size));
@@ -407,6 +411,26 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
);
}
// SIMD shift right (logical)
for ty in &[I16, I32, I64] {
let ushr = ushr.bind(vector(*ty, sse_vector_size));
let bitcast = bitcast.bind(vector(I64, sse_vector_size));
narrow.legalize(
def!(a = ushr(x, y)),
vec![def!(b = bitcast(y)), def!(a = x86_psrl(x, b))],
);
}
// SIMD shift left (arithmetic)
for ty in &[I16, I32, I64] {
let sshr = sshr.bind(vector(*ty, sse_vector_size));
let bitcast = bitcast.bind(vector(I64, sse_vector_size));
narrow.legalize(
def!(a = sshr(x, y)),
vec![def!(b = bitcast(y)), def!(a = x86_psra(x, b))],
);
}
narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane");