Legalize [u|s]widen_high for x86

Use `x86_palignr` and `[u|s]widen_low` for legalizing this instruction.
This commit is contained in:
Andrew Brown
2020-07-07 16:22:26 -07:00
parent c8ddf8a34c
commit f0b083c6ad
2 changed files with 41 additions and 0 deletions

View File

@@ -407,13 +407,18 @@ fn define_simd(
let umax = insts.by_name("umax");
let umin = insts.by_name("umin");
let snarrow = insts.by_name("snarrow");
let swiden_high = insts.by_name("swiden_high");
let swiden_low = insts.by_name("swiden_low");
let ushr_imm = insts.by_name("ushr_imm");
let ushr = insts.by_name("ushr");
let uwiden_high = insts.by_name("uwiden_high");
let uwiden_low = insts.by_name("uwiden_low");
let vconst = insts.by_name("vconst");
let vall_true = insts.by_name("vall_true");
let vany_true = insts.by_name("vany_true");
let vselect = insts.by_name("vselect");
let x86_palignr = x86_instructions.by_name("x86_palignr");
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
let x86_pmins = x86_instructions.by_name("x86_pmins");
@@ -786,6 +791,26 @@ fn define_simd(
);
}
// SIMD widen
for ty in &[I8, I16] {
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(b = swiden_high(a)),
vec![
def!(c = x86_palignr(a, a, uimm8_eight)),
def!(b = swiden_low(c)),
],
);
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
narrow.legalize(
def!(b = uwiden_high(a)),
vec![
def!(c = x86_palignr(a, a, uimm8_eight)),
def!(b = uwiden_low(c)),
],
);
}
narrow.custom_legalize(shuffle, "convert_shuffle");
narrow.custom_legalize(extractlane, "convert_extractlane");
narrow.custom_legalize(insertlane, "convert_insertlane");

View File

@@ -52,3 +52,19 @@ block0(v0:f32x4):
; nextln: v1 = iadd v12, v11
return v1
}
function %uwiden_high(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_high v0
; check: v2 = x86_palignr v0, v0, 8
; nextln: v1 = uwiden_low v2
return v1
}
function %swiden_high(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_high v0
; check: v2 = x86_palignr v0, v0, 8
; nextln: v1 = swiden_low v2
return v1
}