[x64] Add i64x2.abs
This instruction has a single instruction lowering in AVX512F/VL and a three instruction lowering in AVX but neither is currently supported in the x64 backend. To implement this, we instead subtract the vector from 0 and use a blending instruction to pick the lanes containing the absolute value.
This commit is contained in:
5
build.rs
5
build.rs
@@ -183,7 +183,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
|
|||||||
|
|
||||||
match (testsuite, testname) {
|
match (testsuite, testname) {
|
||||||
("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt
|
("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt
|
||||||
("simd", "simd_i64x2_arith2") => return true, // Unsupported feature: proposed simd operator I64x2Abs
|
|
||||||
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
|
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
|
||||||
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
|
||||||
("simd", "simd_i16x8_extmul_i8x16") => return true,
|
("simd", "simd_i16x8_extmul_i8x16") => return true,
|
||||||
@@ -231,7 +230,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
|
|
||||||
// These are new instructions that are not really implemented in any backend.
|
// These are new instructions that are not really implemented in any backend.
|
||||||
("simd", "simd_i8x16_arith2")
|
("simd", "simd_i8x16_arith2")
|
||||||
| ("simd", "simd_i64x2_arith2")
|
|
||||||
| ("simd", "simd_conversions")
|
| ("simd", "simd_conversions")
|
||||||
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
|
||||||
| ("simd", "simd_i16x8_extmul_i8x16")
|
| ("simd", "simd_i16x8_extmul_i8x16")
|
||||||
@@ -250,6 +248,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
| ("simd", "simd_store64_lane")
|
| ("simd", "simd_store64_lane")
|
||||||
| ("simd", "simd_store8_lane") => return true,
|
| ("simd", "simd_store8_lane") => return true,
|
||||||
|
|
||||||
|
// These are only implemented on x64.
|
||||||
|
("simd", "simd_i64x2_arith2") => return !cfg!(feature = "experimental_x64"),
|
||||||
|
|
||||||
// These are only implemented on aarch64 and x64.
|
// These are only implemented on aarch64 and x64.
|
||||||
("simd", "simd_i64x2_cmp")
|
("simd", "simd_i64x2_cmp")
|
||||||
| ("simd", "simd_f32x4_pmin_pmax")
|
| ("simd", "simd_f32x4_pmin_pmax")
|
||||||
|
|||||||
@@ -470,6 +470,7 @@ pub enum SseOpcode {
|
|||||||
Andpd,
|
Andpd,
|
||||||
Andnps,
|
Andnps,
|
||||||
Andnpd,
|
Andnpd,
|
||||||
|
Blendvpd,
|
||||||
Comiss,
|
Comiss,
|
||||||
Comisd,
|
Comisd,
|
||||||
Cmpps,
|
Cmpps,
|
||||||
@@ -758,7 +759,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Palignr
|
| SseOpcode::Palignr
|
||||||
| SseOpcode::Pshufb => SSSE3,
|
| SseOpcode::Pshufb => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps
|
SseOpcode::Blendvpd
|
||||||
|
| SseOpcode::Insertps
|
||||||
| SseOpcode::Packusdw
|
| SseOpcode::Packusdw
|
||||||
| SseOpcode::Pcmpeqq
|
| SseOpcode::Pcmpeqq
|
||||||
| SseOpcode::Pextrb
|
| SseOpcode::Pextrb
|
||||||
@@ -816,6 +818,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Andps => "andps",
|
SseOpcode::Andps => "andps",
|
||||||
SseOpcode::Andnps => "andnps",
|
SseOpcode::Andnps => "andnps",
|
||||||
SseOpcode::Andnpd => "andnpd",
|
SseOpcode::Andnpd => "andnpd",
|
||||||
|
SseOpcode::Blendvpd => "blendvpd",
|
||||||
SseOpcode::Cmpps => "cmpps",
|
SseOpcode::Cmpps => "cmpps",
|
||||||
SseOpcode::Cmppd => "cmppd",
|
SseOpcode::Cmppd => "cmppd",
|
||||||
SseOpcode::Cmpss => "cmpss",
|
SseOpcode::Cmpss => "cmpss",
|
||||||
|
|||||||
@@ -1845,6 +1845,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||||
|
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
||||||
SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
|
SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
|
||||||
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
|
SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
|
||||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||||
|
|||||||
@@ -3426,6 +3426,12 @@ fn test_x64_emit() {
|
|||||||
"orps %xmm5, %xmm4",
|
"orps %xmm5, %xmm4",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
|
||||||
|
"66410F3815E7",
|
||||||
|
"blendvpd %xmm15, %xmm4",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// XMM_RM_R: Integer Packed
|
// XMM_RM_R: Integer Packed
|
||||||
|
|
||||||
|
|||||||
@@ -1853,7 +1853,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if ty.is_vector() {
|
if ty == types::I64X2 {
|
||||||
|
// This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction.
|
||||||
|
// Instead, we use a separate register, `tmp`, to contain the results of `0 - src`
|
||||||
|
// and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1
|
||||||
|
// (i.e. if `tmp` was negative or, conversely, if `src` was originally positive).
|
||||||
|
|
||||||
|
// Emit all 0s into the `tmp` register.
|
||||||
|
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
|
||||||
|
// Subtract the lanes from 0 and set up `dst`.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
|
||||||
|
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
|
||||||
|
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
|
||||||
|
// require the "choice" mask to be in XMM0.
|
||||||
|
ctx.emit(Inst::gen_move(
|
||||||
|
Writable::from_reg(regs::xmm0()),
|
||||||
|
tmp.to_reg(),
|
||||||
|
ty,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
|
||||||
|
} else if ty.is_vector() {
|
||||||
let opcode = match ty {
|
let opcode = match ty {
|
||||||
types::I8X16 => SseOpcode::Pabsb,
|
types::I8X16 => SseOpcode::Pabsb,
|
||||||
types::I16X8 => SseOpcode::Pabsw,
|
types::I16X8 => SseOpcode::Pabsw,
|
||||||
|
|||||||
@@ -1564,7 +1564,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
let a = pop1_with_bitcast(state, type_of(op), builder);
|
let a = pop1_with_bitcast(state, type_of(op), builder);
|
||||||
state.push1(builder.ins().ineg(a))
|
state.push1(builder.ins().ineg(a))
|
||||||
}
|
}
|
||||||
Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs => {
|
Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs | Operator::I64x2Abs => {
|
||||||
let a = pop1_with_bitcast(state, type_of(op), builder);
|
let a = pop1_with_bitcast(state, type_of(op), builder);
|
||||||
state.push1(builder.ins().iabs(a))
|
state.push1(builder.ins().iabs(a))
|
||||||
}
|
}
|
||||||
@@ -1852,7 +1852,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::I64x2ExtMulHighI32x4S
|
| Operator::I64x2ExtMulHighI32x4S
|
||||||
| Operator::I64x2ExtMulLowI32x4U
|
| Operator::I64x2ExtMulLowI32x4U
|
||||||
| Operator::I64x2ExtMulHighI32x4U
|
| Operator::I64x2ExtMulHighI32x4U
|
||||||
| Operator::I64x2Abs
|
|
||||||
| Operator::I64x2AllTrue
|
| Operator::I64x2AllTrue
|
||||||
| Operator::I16x8ExtAddPairwiseI8x16S
|
| Operator::I16x8ExtAddPairwiseI8x16S
|
||||||
| Operator::I16x8ExtAddPairwiseI8x16U
|
| Operator::I16x8ExtAddPairwiseI8x16U
|
||||||
|
|||||||
Reference in New Issue
Block a user