Replace x86_packss with snarrow
Since the Wasm specification contains narrowing instructions (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#integer-to-integer-narrowing) that lower to PACKSS*, the x86-specific instruction is not necessary in the CLIF IR.
This commit is contained in:
@@ -1676,6 +1676,7 @@ fn define_simd(
|
|||||||
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
let uload16x4_complex = shared.by_name("uload16x4_complex");
|
||||||
let uload32x2 = shared.by_name("uload32x2");
|
let uload32x2 = shared.by_name("uload32x2");
|
||||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||||
|
let snarrow = shared.by_name("snarrow");
|
||||||
let ushr_imm = shared.by_name("ushr_imm");
|
let ushr_imm = shared.by_name("ushr_imm");
|
||||||
let usub_sat = shared.by_name("usub_sat");
|
let usub_sat = shared.by_name("usub_sat");
|
||||||
let vconst = shared.by_name("vconst");
|
let vconst = shared.by_name("vconst");
|
||||||
@@ -1686,7 +1687,6 @@ fn define_simd(
|
|||||||
let x86_fmin = x86.by_name("x86_fmin");
|
let x86_fmin = x86.by_name("x86_fmin");
|
||||||
let x86_movlhps = x86.by_name("x86_movlhps");
|
let x86_movlhps = x86.by_name("x86_movlhps");
|
||||||
let x86_movsd = x86.by_name("x86_movsd");
|
let x86_movsd = x86.by_name("x86_movsd");
|
||||||
let x86_packss = x86.by_name("x86_packss");
|
|
||||||
let x86_pblendw = x86.by_name("x86_pblendw");
|
let x86_pblendw = x86.by_name("x86_pblendw");
|
||||||
let x86_pextr = x86.by_name("x86_pextr");
|
let x86_pextr = x86.by_name("x86_pextr");
|
||||||
let x86_pinsr = x86.by_name("x86_pinsr");
|
let x86_pinsr = x86.by_name("x86_pinsr");
|
||||||
@@ -1901,8 +1901,8 @@ fn define_simd(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
|
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
|
||||||
let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size));
|
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
|
||||||
e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes));
|
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
||||||
|
|||||||
@@ -454,35 +454,6 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
let I16xN = &TypeVar::new(
|
|
||||||
"I16xN",
|
|
||||||
"A SIMD vector type containing integers 16-bits wide and up",
|
|
||||||
TypeSetBuilder::new()
|
|
||||||
.ints(16..32)
|
|
||||||
.simd_lanes(4..8)
|
|
||||||
.includes_scalars(false)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let x = &Operand::new("x", I16xN);
|
|
||||||
let y = &Operand::new("y", I16xN);
|
|
||||||
let a = &Operand::new("a", &I16xN.split_lanes());
|
|
||||||
|
|
||||||
ig.push(
|
|
||||||
Inst::new(
|
|
||||||
"x86_packss",
|
|
||||||
r#"
|
|
||||||
Convert packed signed integers the lanes of ``x`` and ``y`` into half-width integers, using
|
|
||||||
signed saturation to handle overflows. For example, with notional i16x2 vectors, where
|
|
||||||
``x = [x1, x0]`` and ``y = [y1, y0]``, this operation would result in
|
|
||||||
``a = [y1', y0', x1', x0']`` (using the Intel manual's right-to-left lane ordering).
|
|
||||||
"#,
|
|
||||||
&formats.binary,
|
|
||||||
)
|
|
||||||
.operands_in(vec![x, y])
|
|
||||||
.operands_out(vec![a]),
|
|
||||||
);
|
|
||||||
|
|
||||||
let x = &Operand::new("x", FxN);
|
let x = &Operand::new("x", FxN);
|
||||||
let y = &Operand::new("y", FxN);
|
let y = &Operand::new("y", FxN);
|
||||||
let a = &Operand::new("a", FxN);
|
let a = &Operand::new("a", FxN);
|
||||||
|
|||||||
@@ -405,6 +405,7 @@ fn define_simd(
|
|||||||
let uadd_sat = insts.by_name("uadd_sat");
|
let uadd_sat = insts.by_name("uadd_sat");
|
||||||
let umax = insts.by_name("umax");
|
let umax = insts.by_name("umax");
|
||||||
let umin = insts.by_name("umin");
|
let umin = insts.by_name("umin");
|
||||||
|
let snarrow = insts.by_name("snarrow");
|
||||||
let ushr_imm = insts.by_name("ushr_imm");
|
let ushr_imm = insts.by_name("ushr_imm");
|
||||||
let ushr = insts.by_name("ushr");
|
let ushr = insts.by_name("ushr");
|
||||||
let vconst = insts.by_name("vconst");
|
let vconst = insts.by_name("vconst");
|
||||||
@@ -412,7 +413,6 @@ fn define_simd(
|
|||||||
let vany_true = insts.by_name("vany_true");
|
let vany_true = insts.by_name("vany_true");
|
||||||
let vselect = insts.by_name("vselect");
|
let vselect = insts.by_name("vselect");
|
||||||
|
|
||||||
let x86_packss = x86_instructions.by_name("x86_packss");
|
|
||||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||||
@@ -575,7 +575,7 @@ fn define_simd(
|
|||||||
def!(g = raw_bitcast_i16x8_again(f)),
|
def!(g = raw_bitcast_i16x8_again(f)),
|
||||||
def!(h = x86_psra(g, b)),
|
def!(h = x86_psra(g, b)),
|
||||||
// Re-pack the vector.
|
// Re-pack the vector.
|
||||||
def!(z = x86_packss(e, h)),
|
def!(z = snarrow(e, h)),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3883,6 +3883,37 @@ pub(crate) fn define(
|
|||||||
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let I16xN = &TypeVar::new(
|
||||||
|
"I16xN",
|
||||||
|
"A SIMD vector type containing integers 16-bits wide and up",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.ints(16..32)
|
||||||
|
.simd_lanes(4..8)
|
||||||
|
.includes_scalars(false)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", I16xN);
|
||||||
|
let y = &Operand::new("y", I16xN);
|
||||||
|
let a = &Operand::new("a", &I16xN.split_lanes());
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"snarrow",
|
||||||
|
r#"
|
||||||
|
Combine `x` and `y` into a vector with twice the lanes but half the integer width while
|
||||||
|
saturating overflowing values to the signed maximum and minimum.
|
||||||
|
|
||||||
|
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
|
||||||
|
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
|
||||||
|
returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let IntTo = &TypeVar::new(
|
let IntTo = &TypeVar::new(
|
||||||
"IntTo",
|
"IntTo",
|
||||||
"A larger integer type with the same number of lanes",
|
"A larger integer type with the same number of lanes",
|
||||||
|
|||||||
@@ -2060,7 +2060,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::X86Pminu
|
| Opcode::X86Pminu
|
||||||
| Opcode::X86Pmullq
|
| Opcode::X86Pmullq
|
||||||
| Opcode::X86Pmuludq
|
| Opcode::X86Pmuludq
|
||||||
| Opcode::X86Packss
|
|
||||||
| Opcode::X86Punpckh
|
| Opcode::X86Punpckh
|
||||||
| Opcode::X86Punpckl
|
| Opcode::X86Punpckl
|
||||||
| Opcode::X86Vcvtudq2ps
|
| Opcode::X86Vcvtudq2ps
|
||||||
@@ -2069,8 +2068,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Iabs => unimplemented!(),
|
|
||||||
Opcode::AvgRound => unimplemented!(),
|
Opcode::AvgRound => unimplemented!(),
|
||||||
|
Opcode::Iabs => unimplemented!(),
|
||||||
|
Opcode::Snarrow => unimplemented!(),
|
||||||
Opcode::TlsValue => unimplemented!(),
|
Opcode::TlsValue => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ block0:
|
|||||||
; nextln: v9 = raw_bitcast.i16x8 v8
|
; nextln: v9 = raw_bitcast.i16x8 v8
|
||||||
; nextln: v10 = x86_psra v9, v4
|
; nextln: v10 = x86_psra v9, v4
|
||||||
|
|
||||||
; nextln: v2 = x86_packss v7, v10
|
; nextln: v2 = snarrow v7, v10
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,8 +118,8 @@ block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %packss_i16x8(i16x8, i16x8) {
|
function %snarrow_i16x8(i16x8, i16x8) {
|
||||||
block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
|
block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
|
||||||
[-, %xmm7] v2 = x86_packss v0, v1 ; bin: 66 41 0f 63 f8
|
[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -206,15 +206,9 @@ block0:
|
|||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
function %pack() -> b1 {
|
function %snarrow(i32x4, i32x4) -> i16x8 {
|
||||||
block0:
|
block0(v0: i32x4, v1: i32x4):
|
||||||
v0 = vconst.i32x4 [0 1 -1 0x0001ffff]
|
v2 = snarrow v0, v1
|
||||||
v1 = vconst.i32x4 [4 5 -6 0xffffffff]
|
return v2
|
||||||
v2 = x86_packss v0, v1
|
|
||||||
|
|
||||||
v3 = vconst.i16x8 [0 1 -1 0x7fff 4 5 -6 0xffff]
|
|
||||||
v4 = icmp eq v2, v3
|
|
||||||
v5 = vall_true v4
|
|
||||||
return v5
|
|
||||||
}
|
}
|
||||||
; run
|
; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]
|
||||||
|
|||||||
Reference in New Issue
Block a user