diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 20f87ac265..de78c3b3b7 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -407,13 +407,18 @@ fn define_simd( let umax = insts.by_name("umax"); let umin = insts.by_name("umin"); let snarrow = insts.by_name("snarrow"); + let swiden_high = insts.by_name("swiden_high"); + let swiden_low = insts.by_name("swiden_low"); let ushr_imm = insts.by_name("ushr_imm"); let ushr = insts.by_name("ushr"); + let uwiden_high = insts.by_name("uwiden_high"); + let uwiden_low = insts.by_name("uwiden_low"); let vconst = insts.by_name("vconst"); let vall_true = insts.by_name("vall_true"); let vany_true = insts.by_name("vany_true"); let vselect = insts.by_name("vselect"); + let x86_palignr = x86_instructions.by_name("x86_palignr"); let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); let x86_pmins = x86_instructions.by_name("x86_pmins"); @@ -786,6 +791,26 @@ fn define_simd( ); } + // SIMD widen + for ty in &[I8, I16] { + let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = swiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = swiden_low(c)), + ], + ); + let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = uwiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = uwiden_low(c)), + ], + ); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif index ccea16de2c..0115107810 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif @@ -52,3 +52,19 @@ block0(v0:f32x4): ; nextln: v1 = iadd v12, v11 return v1 } + +function %uwiden_high(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_high v0 + ; check: v2 = x86_palignr v0, v0, 8 + ; nextln: v1 = uwiden_low v2 + return v1 +} + +function %swiden_high(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = swiden_high v0 + ; check: v2 = x86_palignr v0, v0, 8 + ; nextln: v1 = swiden_low v2 + return v1 +}