Merge pull request #3035 from akirilov-arm/simd_i16x8_q15mulr_sat_s
Enable the simd_i16x8_q15mulr_sat_s test on AArch64
This commit is contained in:
1
build.rs
1
build.rs
@@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_conversions")
|
("simd", "simd_conversions")
|
||||||
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
||||||
| ("simd", "simd_i16x8_extmul_i8x16")
|
| ("simd", "simd_i16x8_extmul_i8x16")
|
||||||
| ("simd", "simd_i16x8_q15mulr_sat_s")
|
|
||||||
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
|
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
|
||||||
| ("simd", "simd_i32x4_extmul_i16x8")
|
| ("simd", "simd_i32x4_extmul_i16x8")
|
||||||
| ("simd", "simd_i32x4_trunc_sat_f64x2")
|
| ("simd", "simd_i32x4_trunc_sat_f64x2")
|
||||||
|
|||||||
@@ -2479,6 +2479,33 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let I16or32 = &TypeVar::new(
|
||||||
|
"I16or32",
|
||||||
|
"A scalar or vector integer type with 16- or 32-bit numbers",
|
||||||
|
TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let qx = &Operand::new("x", I16or32);
|
||||||
|
let qy = &Operand::new("y", I16or32);
|
||||||
|
let qa = &Operand::new("a", I16or32);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"sqmul_round_sat",
|
||||||
|
r#"
|
||||||
|
Fixed-point multiplication of numbers in the QN format, where N + 1
|
||||||
|
is the number bitwidth:
|
||||||
|
`a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
|
||||||
|
|
||||||
|
Polymorphic over all integer types (scalar and vector) with 16- or
|
||||||
|
32-bit numbers.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![qx, qy])
|
||||||
|
.operands_out(vec![qa]),
|
||||||
|
);
|
||||||
|
|
||||||
ig.push(
|
ig.push(
|
||||||
Inst::new(
|
Inst::new(
|
||||||
"udiv",
|
"udiv",
|
||||||
|
|||||||
@@ -2228,6 +2228,14 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
|
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
|
||||||
VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
|
VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
|
||||||
VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
|
VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
|
||||||
|
VecALUOp::Sqrdmulh => {
|
||||||
|
debug_assert!(
|
||||||
|
size.lane_size() == ScalarSize::Size16
|
||||||
|
|| size.lane_size() == ScalarSize::Size32
|
||||||
|
);
|
||||||
|
|
||||||
|
(0b001_01110_00_1 | enc_size << 1, 0b101101)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let top11 = match alu_op {
|
let top11 = match alu_op {
|
||||||
VecALUOp::Smull | VecALUOp::Smull2 => top11,
|
VecALUOp::Smull | VecALUOp::Smull2 => top11,
|
||||||
|
|||||||
@@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() {
|
|||||||
"smull2 v8.2d, v12.4s, v14.4s",
|
"smull2 v8.2d, v12.4s, v14.4s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqrdmulh,
|
||||||
|
rd: writable_vreg(31),
|
||||||
|
rn: vreg(0),
|
||||||
|
rm: vreg(31),
|
||||||
|
size: VectorSize::Size16x8,
|
||||||
|
},
|
||||||
|
"1FB47F6E",
|
||||||
|
"sqrdmulh v31.8h, v0.8h, v31.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqrdmulh,
|
||||||
|
rd: writable_vreg(7),
|
||||||
|
rn: vreg(7),
|
||||||
|
rm: vreg(23),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"E7B4B72E",
|
||||||
|
"sqrdmulh v7.2s, v7.2s, v23.2s",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Not,
|
op: VecMisc2::Not,
|
||||||
|
|||||||
@@ -311,6 +311,8 @@ pub enum VecALUOp {
|
|||||||
Smull,
|
Smull,
|
||||||
/// Signed multiply long (high halves)
|
/// Signed multiply long (high halves)
|
||||||
Smull2,
|
Smull2,
|
||||||
|
/// Signed saturating rounding doubling multiply returning high half
|
||||||
|
Sqrdmulh,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Vector miscellaneous operation with two registers.
|
/// A Vector miscellaneous operation with two registers.
|
||||||
@@ -3980,6 +3982,7 @@ impl Inst {
|
|||||||
VecALUOp::Zip1 => ("zip1", size),
|
VecALUOp::Zip1 => ("zip1", size),
|
||||||
VecALUOp::Smull => ("smull", size),
|
VecALUOp::Smull => ("smull", size),
|
||||||
VecALUOp::Smull2 => ("smull2", size),
|
VecALUOp::Smull2 => ("smull2", size),
|
||||||
|
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
|
||||||
};
|
};
|
||||||
let rd_size = match alu_op {
|
let rd_size = match alu_op {
|
||||||
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
|
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
|
||||||
|
|||||||
@@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
panic!("table_addr should have been removed by legalization!");
|
panic!("table_addr should have been removed by legalization!");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::ConstAddr => unimplemented!(),
|
|
||||||
|
|
||||||
Opcode::Nop => {
|
Opcode::Nop => {
|
||||||
// Nothing.
|
// Nothing.
|
||||||
}
|
}
|
||||||
@@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Vsplit | Opcode::Vconcat => {
|
|
||||||
// TODO
|
|
||||||
panic!("Vector ops not implemented.");
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Isplit => {
|
Opcode::Isplit => {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ctx.input_ty(insn, 0),
|
ctx.input_ty(insn, 0),
|
||||||
@@ -3524,9 +3517,35 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
|
Opcode::SqmulRoundSat => {
|
||||||
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
|
let ty = ty.unwrap();
|
||||||
Opcode::Fvdemote => unimplemented!("Fvdemote"),
|
|
||||||
|
if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
|
||||||
|
return Err(CodegenError::Unsupported(format!(
|
||||||
|
"Unsupported type: {:?}",
|
||||||
|
ty
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqrdmulh,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
size: VectorSize::from_ty(ty),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Opcode::ConstAddr
|
||||||
|
| Opcode::FcvtLowFromSint
|
||||||
|
| Opcode::Fvdemote
|
||||||
|
| Opcode::FvpromoteLow
|
||||||
|
| Opcode::Vconcat
|
||||||
|
| Opcode::Vsplit => unimplemented!("lowering {}", op),
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -2458,11 +2458,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::TlsValue => {
|
Opcode::TlsValue => {
|
||||||
panic!("Thread-local storage support not implemented!");
|
unimplemented!("Thread-local storage support not implemented!");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
|
Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
|
||||||
panic!("Pinned register support not implemented!");
|
unimplemented!("Pinned register support not implemented!");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Icmp => {
|
Opcode::Icmp => {
|
||||||
@@ -2679,10 +2679,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
assert!(is_valid_atomic_transaction_ty(ty));
|
assert!(is_valid_atomic_transaction_ty(ty));
|
||||||
if endianness == Endianness::Little {
|
if endianness == Endianness::Little {
|
||||||
panic!("Little-endian atomic operations not implemented");
|
unimplemented!("Little-endian atomic operations not implemented");
|
||||||
}
|
}
|
||||||
if ty_bits(ty) < 32 {
|
if ty_bits(ty) < 32 {
|
||||||
panic!("Sub-word atomic operations not implemented");
|
unimplemented!("Sub-word atomic operations not implemented");
|
||||||
}
|
}
|
||||||
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
|
||||||
let (alu_op, rn) = match op {
|
let (alu_op, rn) = match op {
|
||||||
@@ -2701,7 +2701,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
(choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
|
(choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
|
||||||
}
|
}
|
||||||
_ => panic!("AtomicRmw operation type {:?} not implemented", op),
|
_ => unimplemented!("AtomicRmw operation type {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
let mem = MemArg::reg(addr, flags);
|
let mem = MemArg::reg(addr, flags);
|
||||||
ctx.emit(Inst::AtomicRmw {
|
ctx.emit(Inst::AtomicRmw {
|
||||||
@@ -2721,10 +2721,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
assert!(is_valid_atomic_transaction_ty(ty));
|
assert!(is_valid_atomic_transaction_ty(ty));
|
||||||
if endianness == Endianness::Little {
|
if endianness == Endianness::Little {
|
||||||
panic!("Little-endian atomic operations not implemented");
|
unimplemented!("Little-endian atomic operations not implemented");
|
||||||
}
|
}
|
||||||
if ty_bits(ty) < 32 {
|
if ty_bits(ty) < 32 {
|
||||||
panic!("Sub-word atomic operations not implemented");
|
unimplemented!("Sub-word atomic operations not implemented");
|
||||||
}
|
}
|
||||||
let mem = MemArg::reg(addr, flags);
|
let mem = MemArg::reg(addr, flags);
|
||||||
ctx.emit(Inst::gen_move(rd, rm, ty));
|
ctx.emit(Inst::gen_move(rd, rm, ty));
|
||||||
@@ -2865,13 +2865,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::UwidenLow
|
| Opcode::UwidenLow
|
||||||
| Opcode::UwidenHigh
|
| Opcode::UwidenHigh
|
||||||
| Opcode::WideningPairwiseDotProductS
|
| Opcode::WideningPairwiseDotProductS
|
||||||
|
| Opcode::SqmulRoundSat
|
||||||
| Opcode::FvpromoteLow
|
| Opcode::FvpromoteLow
|
||||||
| Opcode::Fvdemote => {
|
| Opcode::Fvdemote => {
|
||||||
// TODO
|
// TODO
|
||||||
panic!("Vector ops not implemented.");
|
unimplemented!("Vector ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),
|
Opcode::Isplit | Opcode::Iconcat => unimplemented!("Wide integer ops not implemented."),
|
||||||
|
|
||||||
Opcode::Spill
|
Opcode::Spill
|
||||||
| Opcode::Fill
|
| Opcode::Fill
|
||||||
|
|||||||
@@ -6001,6 +6001,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
unimplemented!("Vector split/concat ops not implemented.");
|
unimplemented!("Vector split/concat ops not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
||||||
|
|
||||||
// Opcodes that should be removed by legalization. These should
|
// Opcodes that should be removed by legalization. These should
|
||||||
// eventually be removed if/when we replace in-situ legalization with
|
// eventually be removed if/when we replace in-situ legalization with
|
||||||
// something better.
|
// something better.
|
||||||
|
|||||||
Binary file not shown.
@@ -574,6 +574,7 @@ where
|
|||||||
Opcode::AtomicStore => unimplemented!("AtomicStore"),
|
Opcode::AtomicStore => unimplemented!("AtomicStore"),
|
||||||
Opcode::Fence => unimplemented!("Fence"),
|
Opcode::Fence => unimplemented!("Fence"),
|
||||||
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
|
||||||
|
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
||||||
|
|
||||||
// TODO: these instructions should be removed once the new backend makes these obsolete
|
// TODO: these instructions should be removed once the new backend makes these obsolete
|
||||||
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
|
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
|
||||||
|
|||||||
@@ -1885,8 +1885,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let arg = pop1_with_bitcast(state, type_of(op), builder);
|
let arg = pop1_with_bitcast(state, type_of(op), builder);
|
||||||
state.push1(builder.ins().popcnt(arg));
|
state.push1(builder.ins().popcnt(arg));
|
||||||
}
|
}
|
||||||
Operator::I16x8Q15MulrSatS
|
Operator::I16x8Q15MulrSatS => {
|
||||||
| Operator::I16x8ExtMulLowI8x16S
|
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
|
||||||
|
|
||||||
|
state.push1(builder.ins().sqmul_round_sat(a, b))
|
||||||
|
}
|
||||||
|
Operator::I16x8ExtMulLowI8x16S
|
||||||
| Operator::I16x8ExtMulHighI8x16S
|
| Operator::I16x8ExtMulHighI8x16S
|
||||||
| Operator::I16x8ExtMulLowI8x16U
|
| Operator::I16x8ExtMulLowI8x16U
|
||||||
| Operator::I16x8ExtMulHighI8x16U
|
| Operator::I16x8ExtMulHighI8x16U
|
||||||
|
|||||||
Reference in New Issue
Block a user