Refactor and turn on lowering for extend-add-pairwise

This commit is contained in:
Johnnie Birch
2021-07-29 20:19:38 -07:00
parent e373ddfe1b
commit e519fca61c
8 changed files with 107 additions and 80 deletions

View File

@@ -156,10 +156,8 @@ fn write_testsuite_tests(
let testname = extract_name(path); let testname = extract_name(path);
writeln!(out, "#[test]")?; writeln!(out, "#[test]")?;
if x64_should_panic(testsuite, &testname, strategy) {
writeln!(out, r#"#[should_panic]"#)?;
// Ignore when using QEMU for running tests (limited memory). // Ignore when using QEMU for running tests (limited memory).
} else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) { if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
writeln!(out, "#[ignore]")?; writeln!(out, "#[ignore]")?;
} }
@@ -182,19 +180,6 @@ fn write_testsuite_tests(
Ok(()) Ok(())
} }
/// For x64 backend features that are not supported yet, mark tests as panicking, so
/// they stop "passing" once the features are properly implemented.
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
if !platform_is_x64() || strategy != "Cranelift" {
return false;
}
match (testsuite, testname) {
_ => {}
}
false
}
/// Ignore tests that aren't supported yet. /// Ignore tests that aren't supported yet.
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
match strategy { match strategy {
@@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", _) if cfg!(feature = "old-x86-backend") => return true, ("simd", _) if cfg!(feature = "old-x86-backend") => return true,
// No simd support yet for s390x. // No simd support yet for s390x.
("simd", _) if platform_is_s390x() => return true, ("simd", _) if platform_is_s390x() => return true,
// These are new instructions that are only known to be supported for x64.
("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
if !platform_is_x64() =>
{
return true
}
_ => {} _ => {}
}, },
_ => panic!("unrecognized strategy"), _ => panic!("unrecognized strategy"),

View File

@@ -4114,20 +4114,7 @@ pub(crate) fn define(
Inst::new( Inst::new(
"uwiden_high", "uwiden_high",
r#" r#"
Lane-wise integer extended pairwise addition producing extended results Widen the high lanes of `x` using unsigned extension.
(twice wider results than the input)
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"extended_pairwise_add_signed",
r#"
Widen the high lanes of `x` using signed extension.
This will double the lane width and halve the number of lanes. This will double the lane width and halve the number of lanes.
"#, "#,
@@ -4137,17 +4124,24 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let x = &Operand::new("x", I8or16or32xN);
let y = &Operand::new("y", I8or16or32xN);
let a = &Operand::new("a", I8or16or32xN);
ig.push( ig.push(
Inst::new( Inst::new(
"extended_pairwise_add_unsigned", "iadd_pairwise",
r#" r#"
Widen the high lanes of `x` extending with zeros. Does lane-wise integer pairwise addition on two operands, putting the
combined results into a single vector result. Here a pair refers to adjacent
This will double the lane width and halve the number of lanes. lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
pairwise add results will make up the low half of the resulting vector while
the second operand pairwise add results will make up the upper half of the
resulting vector.
"#, "#,
&formats.unary, &formats.binary,
) )
.operands_in(vec![x]) .operands_in(vec![x, y])
.operands_out(vec![a]), .operands_out(vec![a]),
); );

View File

@@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}); });
} }
Opcode::ExtendedPairwiseAddSigned Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
| Opcode::ExtendedPairwiseAddUnsigned unimplemented!("lowering {}", op)
| Opcode::ConstAddr }
| Opcode::Vconcat
| Opcode::Vsplit => unimplemented!("lowering {}", op),
} }
Ok(()) Ok(())

View File

@@ -2869,8 +2869,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::SqmulRoundSat | Opcode::SqmulRoundSat
| Opcode::FvpromoteLow | Opcode::FvpromoteLow
| Opcode::Fvdemote | Opcode::Fvdemote
| Opcode::ExtendedPairwiseAddSigned | Opcode::IaddPairwise => {
| Opcode::ExtendedPairwiseAddUnsigned => {
// TODO // TODO
unimplemented!("Vector ops not implemented."); unimplemented!("Vector ops not implemented.");
} }

View File

@@ -4927,18 +4927,33 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
} }
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => { Opcode::IaddPairwise => {
// Extended pairwise addition instructions computes extended sums within adjacent if let (Some(swiden_low), Some(swiden_high)) = (
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes. matches_input(ctx, inputs[0], Opcode::SwidenLow),
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380 matches_input(ctx, inputs[1], Opcode::SwidenHigh),
/* ) {
let input_ty = ctx.input_ty(insn, 0); let swiden_input = &[
InsnInput {
insn: swiden_low,
input: 0,
},
InsnInput {
insn: swiden_high,
input: 0,
},
];
let input_ty = ctx.input_ty(swiden_low, 0);
let output_ty = ctx.output_ty(insn, 0); let output_ty = ctx.output_ty(insn, 0);
let src = put_input_in_reg(ctx, inputs[0]); let src0 = put_input_in_reg(ctx, swiden_input[0]);
let src1 = put_input_in_reg(ctx, swiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
unreachable!(); if src0 != src1 {
match op { unimplemented!(
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) { "iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => { (types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16]; static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
@@ -4949,7 +4964,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMem::reg(mul_const_reg.to_reg()), RegMem::reg(mul_const_reg.to_reg()),
dst, dst,
)); ));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst)); ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
} }
(types::I16X8, types::I32X4) => { (types::I16X8, types::I32X4) => {
static MUL_CONST: [u8; 16] = [ static MUL_CONST: [u8; 16] = [
@@ -4959,25 +4974,49 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8)); ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddwd, SseOpcode::Pmaddwd,
RegMem::reg(mul_const_reg.to_reg()), RegMem::reg(mul_const_reg.to_reg()),
dst, dst,
)); ));
} }
_ => unreachable!( _ => {
"Type pattern not supported {:?}-{:?} not supported for {:?}.", unimplemented!("Type not supported for {:?}", op);
input_ty, output_ty, op }
), }
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
matches_input(ctx, inputs[0], Opcode::UwidenLow),
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
) {
let uwiden_input = &[
InsnInput {
insn: uwiden_low,
input: 0,
}, },
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) { InsnInput {
insn: uwiden_high,
input: 0,
},
];
let input_ty = ctx.input_ty(uwiden_low, 0);
let output_ty = ctx.output_ty(insn, 0);
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if src0 != src1 {
unimplemented!(
"iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => { (types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16]; static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16)); ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddubsw, SseOpcode::Pmaddubsw,
RegMem::reg(mul_const_reg.to_reg()), RegMem::reg(mul_const_reg.to_reg()),
@@ -4997,7 +5036,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
pxor_const_reg, pxor_const_reg,
types::I16X8, types::I16X8,
)); ));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor, SseOpcode::Pxor,
RegMem::reg(pxor_const_reg.to_reg()), RegMem::reg(pxor_const_reg.to_reg()),
@@ -5021,7 +5060,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMem::reg(madd_const_reg.to_reg()), RegMem::reg(madd_const_reg.to_reg()),
dst, dst,
)); ));
static ADDD_CONST2: [u8; 16] = [ static ADDD_CONST2: [u8; 16] = [
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
@@ -5040,14 +5078,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
)); ));
} }
_ => unreachable!( _ => {
"Type pattern not supported {:?}-{:?} not supported for {:?}.", unimplemented!("Type not supported for {:?}", op);
input_ty, output_ty, op }
), }
}, } else {
_ => unreachable!("{:?} not supported.", op), unimplemented!("Operands not supported for {:?}", op);
} }
*/
} }
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => { Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
let input_ty = ctx.input_ty(insn, 0); let input_ty = ctx.input_ty(insn, 0);

View File

@@ -630,8 +630,7 @@ where
Opcode::Fence => unimplemented!("Fence"), Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"), Opcode::IaddPairwise => unimplemented!("IaddPairwise"),
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
// TODO: these instructions should be removed once the new backend makes these obsolete // TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

View File

@@ -1881,19 +1881,27 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
} }
Operator::I16x8ExtAddPairwiseI8x16S => { Operator::I16x8ExtAddPairwiseI8x16S => {
let a = pop1_with_bitcast(state, I8X16, builder); let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a)) let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
} }
Operator::I32x4ExtAddPairwiseI16x8S => { Operator::I32x4ExtAddPairwiseI16x8S => {
let a = pop1_with_bitcast(state, I16X8, builder); let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a)) let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
} }
Operator::I16x8ExtAddPairwiseI8x16U => { Operator::I16x8ExtAddPairwiseI8x16U => {
let a = pop1_with_bitcast(state, I8X16, builder); let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a)) let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
} }
Operator::I32x4ExtAddPairwiseI16x8U => { Operator::I32x4ExtAddPairwiseI16x8U => {
let a = pop1_with_bitcast(state, I16X8, builder); let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a)) let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
} }
Operator::F32x4Ceil | Operator::F64x2Ceil => { Operator::F32x4Ceil | Operator::F64x2Ceil => {
// This is something of a misuse of `type_of`, because that produces the return type // This is something of a misuse of `type_of`, because that produces the return type