Refactor and turn on lowering for extend-add-pairwise
This commit is contained in:
24
build.rs
24
build.rs
@@ -156,10 +156,8 @@ fn write_testsuite_tests(
|
|||||||
let testname = extract_name(path);
|
let testname = extract_name(path);
|
||||||
|
|
||||||
writeln!(out, "#[test]")?;
|
writeln!(out, "#[test]")?;
|
||||||
if x64_should_panic(testsuite, &testname, strategy) {
|
|
||||||
writeln!(out, r#"#[should_panic]"#)?;
|
|
||||||
// Ignore when using QEMU for running tests (limited memory).
|
// Ignore when using QEMU for running tests (limited memory).
|
||||||
} else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
|
if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
|
||||||
writeln!(out, "#[ignore]")?;
|
writeln!(out, "#[ignore]")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -182,19 +180,6 @@ fn write_testsuite_tests(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// For x64 backend features that are not supported yet, mark tests as panicking, so
|
|
||||||
/// they stop "passing" once the features are properly implemented.
|
|
||||||
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|
||||||
if !platform_is_x64() || strategy != "Cranelift" {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
match (testsuite, testname) {
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Ignore tests that aren't supported yet.
|
/// Ignore tests that aren't supported yet.
|
||||||
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||||
match strategy {
|
match strategy {
|
||||||
@@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
|
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
|
||||||
// No simd support yet for s390x.
|
// No simd support yet for s390x.
|
||||||
("simd", _) if platform_is_s390x() => return true,
|
("simd", _) if platform_is_s390x() => return true,
|
||||||
|
// These are new instructions that are only known to be supported for x64.
|
||||||
|
("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
||||||
|
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
|
||||||
|
if !platform_is_x64() =>
|
||||||
|
{
|
||||||
|
return true
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
_ => panic!("unrecognized strategy"),
|
_ => panic!("unrecognized strategy"),
|
||||||
|
|||||||
@@ -4114,8 +4114,9 @@ pub(crate) fn define(
|
|||||||
Inst::new(
|
Inst::new(
|
||||||
"uwiden_high",
|
"uwiden_high",
|
||||||
r#"
|
r#"
|
||||||
Lane-wise integer extended pairwise addition producing extended results
|
Widen the high lanes of `x` using unsigned extension.
|
||||||
(twice wider results than the input)
|
|
||||||
|
This will double the lane width and halve the number of lanes.
|
||||||
"#,
|
"#,
|
||||||
&formats.unary,
|
&formats.unary,
|
||||||
)
|
)
|
||||||
@@ -4123,31 +4124,24 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
ig.push(
|
let x = &Operand::new("x", I8or16or32xN);
|
||||||
Inst::new(
|
let y = &Operand::new("y", I8or16or32xN);
|
||||||
"extended_pairwise_add_signed",
|
let a = &Operand::new("a", I8or16or32xN);
|
||||||
r#"
|
|
||||||
Widen the high lanes of `x` using signed extension.
|
|
||||||
|
|
||||||
This will double the lane width and halve the number of lanes.
|
|
||||||
"#,
|
|
||||||
&formats.unary,
|
|
||||||
)
|
|
||||||
.operands_in(vec![x])
|
|
||||||
.operands_out(vec![a]),
|
|
||||||
);
|
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"extended_pairwise_add_unsigned",
|
"iadd_pairwise",
|
||||||
r#"
|
r#"
|
||||||
Widen the high lanes of `x` extending with zeros.
|
Does lane-wise integer pairwise addition on two operands, putting the
|
||||||
|
combined results into a single vector result. Here a pair refers to adjacent
|
||||||
This will double the lane width and halve the number of lanes.
|
lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
|
||||||
|
pairwise add results will make up the low half of the resulting vector while
|
||||||
|
the second operand pairwise add results will make up the upper half of the
|
||||||
|
resulting vector.
|
||||||
"#,
|
"#,
|
||||||
&formats.unary,
|
&formats.binary,
|
||||||
)
|
)
|
||||||
.operands_in(vec![x])
|
.operands_in(vec![x, y])
|
||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::ExtendedPairwiseAddSigned
|
Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
|
||||||
| Opcode::ExtendedPairwiseAddUnsigned
|
unimplemented!("lowering {}", op)
|
||||||
| Opcode::ConstAddr
|
}
|
||||||
| Opcode::Vconcat
|
|
||||||
| Opcode::Vsplit => unimplemented!("lowering {}", op),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -2869,8 +2869,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::SqmulRoundSat
|
| Opcode::SqmulRoundSat
|
||||||
| Opcode::FvpromoteLow
|
| Opcode::FvpromoteLow
|
||||||
| Opcode::Fvdemote
|
| Opcode::Fvdemote
|
||||||
| Opcode::ExtendedPairwiseAddSigned
|
| Opcode::IaddPairwise => {
|
||||||
| Opcode::ExtendedPairwiseAddUnsigned => {
|
|
||||||
// TODO
|
// TODO
|
||||||
unimplemented!("Vector ops not implemented.");
|
unimplemented!("Vector ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4927,18 +4927,33 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
|
Opcode::IaddPairwise => {
|
||||||
// Extended pairwise addition instructions computes extended sums within adjacent
|
if let (Some(swiden_low), Some(swiden_high)) = (
|
||||||
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
|
matches_input(ctx, inputs[0], Opcode::SwidenLow),
|
||||||
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
|
matches_input(ctx, inputs[1], Opcode::SwidenHigh),
|
||||||
/*
|
) {
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
let swiden_input = &[
|
||||||
let output_ty = ctx.output_ty(insn, 0);
|
InsnInput {
|
||||||
let src = put_input_in_reg(ctx, inputs[0]);
|
insn: swiden_low,
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
input: 0,
|
||||||
unreachable!();
|
},
|
||||||
match op {
|
InsnInput {
|
||||||
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
|
insn: swiden_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let input_ty = ctx.input_ty(swiden_low, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let src0 = put_input_in_reg(ctx, swiden_input[0]);
|
||||||
|
let src1 = put_input_in_reg(ctx, swiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
if src0 != src1 {
|
||||||
|
unimplemented!(
|
||||||
|
"iadd_pairwise not implemented for general case with different inputs"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
match (input_ty, output_ty) {
|
||||||
(types::I8X16, types::I16X8) => {
|
(types::I8X16, types::I16X8) => {
|
||||||
static MUL_CONST: [u8; 16] = [0x01; 16];
|
static MUL_CONST: [u8; 16] = [0x01; 16];
|
||||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||||
@@ -4949,7 +4964,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
RegMem::reg(mul_const_reg.to_reg()),
|
RegMem::reg(mul_const_reg.to_reg()),
|
||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
|
||||||
}
|
}
|
||||||
(types::I16X8, types::I32X4) => {
|
(types::I16X8, types::I32X4) => {
|
||||||
static MUL_CONST: [u8; 16] = [
|
static MUL_CONST: [u8; 16] = [
|
||||||
@@ -4959,25 +4974,49 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||||
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||||
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
|
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
|
||||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pmaddwd,
|
SseOpcode::Pmaddwd,
|
||||||
RegMem::reg(mul_const_reg.to_reg()),
|
RegMem::reg(mul_const_reg.to_reg()),
|
||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
_ => unreachable!(
|
_ => {
|
||||||
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
|
unimplemented!("Type not supported for {:?}", op);
|
||||||
input_ty, output_ty, op
|
}
|
||||||
),
|
}
|
||||||
},
|
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
|
||||||
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
|
matches_input(ctx, inputs[0], Opcode::UwidenLow),
|
||||||
|
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
|
||||||
|
) {
|
||||||
|
let uwiden_input = &[
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden_low,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
InsnInput {
|
||||||
|
insn: uwiden_high,
|
||||||
|
input: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let input_ty = ctx.input_ty(uwiden_low, 0);
|
||||||
|
let output_ty = ctx.output_ty(insn, 0);
|
||||||
|
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
|
||||||
|
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
if src0 != src1 {
|
||||||
|
unimplemented!(
|
||||||
|
"iadd_pairwise not implemented for general case with different inputs"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
match (input_ty, output_ty) {
|
||||||
(types::I8X16, types::I16X8) => {
|
(types::I8X16, types::I16X8) => {
|
||||||
static MUL_CONST: [u8; 16] = [0x01; 16];
|
static MUL_CONST: [u8; 16] = [0x01; 16];
|
||||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||||
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||||
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
|
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
|
||||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pmaddubsw,
|
SseOpcode::Pmaddubsw,
|
||||||
RegMem::reg(mul_const_reg.to_reg()),
|
RegMem::reg(mul_const_reg.to_reg()),
|
||||||
@@ -4997,7 +5036,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
pxor_const_reg,
|
pxor_const_reg,
|
||||||
types::I16X8,
|
types::I16X8,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
SseOpcode::Pxor,
|
SseOpcode::Pxor,
|
||||||
RegMem::reg(pxor_const_reg.to_reg()),
|
RegMem::reg(pxor_const_reg.to_reg()),
|
||||||
@@ -5021,7 +5060,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
RegMem::reg(madd_const_reg.to_reg()),
|
RegMem::reg(madd_const_reg.to_reg()),
|
||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
|
|
||||||
static ADDD_CONST2: [u8; 16] = [
|
static ADDD_CONST2: [u8; 16] = [
|
||||||
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
|
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
|
||||||
0x00, 0x00, 0x01, 0x00,
|
0x00, 0x00, 0x01, 0x00,
|
||||||
@@ -5040,14 +5078,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
dst,
|
dst,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
_ => unreachable!(
|
_ => {
|
||||||
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
|
unimplemented!("Type not supported for {:?}", op);
|
||||||
input_ty, output_ty, op
|
}
|
||||||
),
|
}
|
||||||
},
|
} else {
|
||||||
_ => unreachable!("{:?} not supported.", op),
|
unimplemented!("Operands not supported for {:?}", op);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
|
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
|||||||
Binary file not shown.
@@ -630,8 +630,7 @@ where
|
|||||||
Opcode::Fence => unimplemented!("Fence"),
|
Opcode::Fence => unimplemented!("Fence"),
|
||||||
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
||||||
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
||||||
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
|
Opcode::IaddPairwise => unimplemented!("IaddPairwise"),
|
||||||
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
|
|
||||||
|
|
||||||
// TODO: these instructions should be removed once the new backend makes these obsolete
|
// TODO: these instructions should be removed once the new backend makes these obsolete
|
||||||
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
|
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
|
||||||
|
|||||||
@@ -1881,19 +1881,27 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
}
|
}
|
||||||
Operator::I16x8ExtAddPairwiseI8x16S => {
|
Operator::I16x8ExtAddPairwiseI8x16S => {
|
||||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||||
state.push1(builder.ins().extended_pairwise_add_signed(a))
|
let widen_low = builder.ins().swiden_low(a);
|
||||||
|
let widen_high = builder.ins().swiden_high(a);
|
||||||
|
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
|
||||||
}
|
}
|
||||||
Operator::I32x4ExtAddPairwiseI16x8S => {
|
Operator::I32x4ExtAddPairwiseI16x8S => {
|
||||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||||
state.push1(builder.ins().extended_pairwise_add_signed(a))
|
let widen_low = builder.ins().swiden_low(a);
|
||||||
|
let widen_high = builder.ins().swiden_high(a);
|
||||||
|
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
|
||||||
}
|
}
|
||||||
Operator::I16x8ExtAddPairwiseI8x16U => {
|
Operator::I16x8ExtAddPairwiseI8x16U => {
|
||||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||||
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
|
let widen_low = builder.ins().uwiden_low(a);
|
||||||
|
let widen_high = builder.ins().uwiden_high(a);
|
||||||
|
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
|
||||||
}
|
}
|
||||||
Operator::I32x4ExtAddPairwiseI16x8U => {
|
Operator::I32x4ExtAddPairwiseI16x8U => {
|
||||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||||
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
|
let widen_low = builder.ins().uwiden_low(a);
|
||||||
|
let widen_high = builder.ins().uwiden_high(a);
|
||||||
|
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
|
||||||
}
|
}
|
||||||
Operator::F32x4Ceil | Operator::F64x2Ceil => {
|
Operator::F32x4Ceil | Operator::F64x2Ceil => {
|
||||||
// This is something of a misuse of `type_of`, because that produces the return type
|
// This is something of a misuse of `type_of`, because that produces the return type
|
||||||
|
|||||||
Reference in New Issue
Block a user