[x64] Add the new i64x2 comparisons

This commit is contained in:
Andrew Brown
2021-02-26 14:55:58 -08:00
parent 44e76fe9c0
commit d730f18a78
3 changed files with 53 additions and 20 deletions

View File

@@ -182,7 +182,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
} }
match (testsuite, testname) { match (testsuite, testname) {
("simd", "simd_i64x2_cmp") => return true,
("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt ("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt
("simd", "simd_i64x2_arith2") => return true, // Unsupported feature: proposed simd operator I64x2Abs ("simd", "simd_i64x2_arith2") => return true, // Unsupported feature: proposed simd operator I64x2Abs
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6 ("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
@@ -231,8 +230,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", "simd_boolean") | ("simd", "simd_lane") => return true, ("simd", "simd_boolean") | ("simd", "simd_lane") => return true,
// These are new instructions that are not really implemented in any backend. // These are new instructions that are not really implemented in any backend.
("simd", "simd_i64x2_cmp") ("simd", "simd_i8x16_arith2")
| ("simd", "simd_i8x16_arith2")
| ("simd", "simd_i64x2_arith2") | ("simd", "simd_i64x2_arith2")
| ("simd", "simd_conversions") | ("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16") | ("simd", "simd_i16x8_extadd_pairwise_i8x16")
@@ -252,6 +250,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
| ("simd", "simd_store64_lane") | ("simd", "simd_store64_lane")
| ("simd", "simd_store8_lane") => return true, | ("simd", "simd_store8_lane") => return true,
// These are only implemented on x64.
("simd", "simd_i64x2_cmp") => return !cfg!(feature = "experimental_x64"),
// These are only implemented on aarch64 and x64. // These are only implemented on aarch64 and x64.
("simd", "simd_f32x4_pmin_pmax") ("simd", "simd_f32x4_pmin_pmax")
| ("simd", "simd_f64x2_pmin_pmax") | ("simd", "simd_f64x2_pmin_pmax")

View File

@@ -3185,11 +3185,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
), ),
}; };
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) // Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
// and which to use as the read `input` (ModRM r/m field). In the normal case we // which to use as the read `input` (ModRM r/m field). In the normal case we use
// use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for // Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
// the less-than cases so that we can reuse the greater-than implementation. // less-than cases so that we can reuse the greater-than implementation.
//
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
// from the normal order because of the special-case lowering for these instructions
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
let input = match condcode { let input = match condcode {
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
IntCC::SignedLessThan IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual | IntCC::SignedLessThanOrEqual
| IntCC::UnsignedLessThan | IntCC::UnsignedLessThan
@@ -3220,10 +3236,25 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
IntCC::SignedGreaterThan | IntCC::SignedLessThan => { IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst)) ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
} }
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => { IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty != types::I64X2 =>
{
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
} }
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty == types::I64X2 =>
{
// The PMINS* instruction is only available in AVX512VL/F so we must instead
// compare with flipped operands and negate the result (emitting one more
// instruction).
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPGT*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => { IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));

View File

@@ -1642,16 +1642,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, type_of(op), builder); let a = pop1_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().vhigh_bits(I32, a)); state.push1(builder.ins().vhigh_bits(I32, a));
} }
Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => { Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq | Operator::I64x2Eq => {
translate_vector_icmp(IntCC::Equal, type_of(op), builder, state) translate_vector_icmp(IntCC::Equal, type_of(op), builder, state)
} }
Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne => { Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne | Operator::I64x2Ne => {
translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state) translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state)
} }
Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS => { Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS | Operator::I64x2GtS => {
translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state) translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state)
} }
Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS => { Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS | Operator::I64x2LtS => {
translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state) translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state)
} }
Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => { Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => {
@@ -1660,10 +1660,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => { Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => {
translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state) translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state)
} }
Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS => { Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS | Operator::I64x2GeS => {
translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state) translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state)
} }
Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS => { Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS | Operator::I64x2LeS => {
translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state) translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state)
} }
Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp( Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp(
@@ -1852,12 +1852,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I64x2ExtMulHighI32x4S | Operator::I64x2ExtMulHighI32x4S
| Operator::I64x2ExtMulLowI32x4U | Operator::I64x2ExtMulLowI32x4U
| Operator::I64x2ExtMulHighI32x4U | Operator::I64x2ExtMulHighI32x4U
| Operator::I64x2Eq
| Operator::I64x2Ne
| Operator::I64x2LtS
| Operator::I64x2GtS
| Operator::I64x2LeS
| Operator::I64x2GeS
| Operator::I64x2Abs | Operator::I64x2Abs
| Operator::I64x2AllTrue | Operator::I64x2AllTrue
| Operator::I16x8ExtAddPairwiseI8x16S | Operator::I16x8ExtAddPairwiseI8x16S
@@ -2646,7 +2640,14 @@ fn type_of(operator: &Operator) -> Type {
| Operator::V128Load64Splat { .. } | Operator::V128Load64Splat { .. }
| Operator::I64x2ExtractLane { .. } | Operator::I64x2ExtractLane { .. }
| Operator::I64x2ReplaceLane { .. } | Operator::I64x2ReplaceLane { .. }
| Operator::I64x2Eq
| Operator::I64x2Ne
| Operator::I64x2LtS
| Operator::I64x2GtS
| Operator::I64x2LeS
| Operator::I64x2GeS
| Operator::I64x2Neg | Operator::I64x2Neg
| Operator::I64x2Abs
| Operator::I64x2Shl | Operator::I64x2Shl
| Operator::I64x2ShrS | Operator::I64x2ShrS
| Operator::I64x2ShrU | Operator::I64x2ShrU