diff --git a/build.rs b/build.rs index 86877ff877..bd8758c4e2 100644 --- a/build.rs +++ b/build.rs @@ -182,7 +182,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str } match (testsuite, testname) { - ("simd", "simd_i64x2_cmp") => return true, ("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt ("simd", "simd_i64x2_arith2") => return true, // Unsupported feature: proposed simd operator I64x2Abs ("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6 @@ -231,8 +230,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_boolean") | ("simd", "simd_lane") => return true, // These are new instructions that are not really implemented in any backend. - ("simd", "simd_i64x2_cmp") - | ("simd", "simd_i8x16_arith2") + ("simd", "simd_i8x16_arith2") | ("simd", "simd_i64x2_arith2") | ("simd", "simd_conversions") | ("simd", "simd_i16x8_extadd_pairwise_i8x16") @@ -252,6 +250,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { | ("simd", "simd_store64_lane") | ("simd", "simd_store8_lane") => return true, + // These are only implemented on x64. + ("simd", "simd_i64x2_cmp") => return !cfg!(feature = "experimental_x64"), + // These are only implemented on aarch64 and x64. ("simd", "simd_f32x4_pmin_pmax") | ("simd", "simd_f64x2_pmin_pmax") diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 0b88d79a24..827edc69ce 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3185,11 +3185,27 @@ fn lower_insn_to_regs>( ), }; - // Here we decide which operand to use as the read/write `dst` (ModRM reg field) - // and which to use as the read `input` (ModRM r/m field). In the normal case we - // use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for - // the less-than cases so that we can reuse the greater-than implementation. + // Here we decide which operand to use as the read/write `dst` (ModRM reg field) and + // which to use as the read `input` (ModRM r/m field). In the normal case we use + // Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the + // less-than cases so that we can reuse the greater-than implementation. + // + // In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped + // from the normal order because of the special-case lowering for these instructions + // (i.e. we use PCMPGTQ with flipped operands and negate the result). let input = match condcode { + IntCC::SignedLessThanOrEqual if ty == types::I64X2 => { + let lhs = put_input_in_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + ctx.emit(Inst::gen_move(dst, lhs, ty)); + rhs + } + IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => { + let lhs = input_to_reg_mem(ctx, inputs[0]); + let rhs = put_input_in_reg(ctx, inputs[1]); + ctx.emit(Inst::gen_move(dst, rhs, ty)); + lhs + } IntCC::SignedLessThan | IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThan @@ -3220,10 +3236,25 @@ fn lower_insn_to_regs>( IntCC::SignedGreaterThan | IntCC::SignedLessThan => { ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst)) } - IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => { + IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual + if ty != types::I64X2 => + { ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) } + IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual + if ty == types::I64X2 => + { + // The PMINS* instruction is only available in AVX512VL/F so we must instead + // compare with flipped operands and negate the result (emitting one more + // instruction). + ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst)); + // Emit all 1s into the `tmp` register. + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp)); + // Invert the result of the `PCMPGT*`. + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst)); + } IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => { ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index cc57efbacd..adeb7bda2d 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1642,16 +1642,16 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, type_of(op), builder); state.push1(builder.ins().vhigh_bits(I32, a)); } - Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => { + Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq | Operator::I64x2Eq => { translate_vector_icmp(IntCC::Equal, type_of(op), builder, state) } - Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne => { + Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne | Operator::I64x2Ne => { translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state) } - Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS => { + Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS | Operator::I64x2GtS => { translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state) } - Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS => { + Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS | Operator::I64x2LtS => { translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state) } Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => { @@ -1660,10 +1660,10 @@ pub fn translate_operator( Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => { translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state) } - Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS => { + Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS | Operator::I64x2GeS => { translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state) } - Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS => { + Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS | Operator::I64x2LeS => { translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state) } Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp( @@ -1852,12 +1852,6 @@ pub fn translate_operator( | Operator::I64x2ExtMulHighI32x4S | Operator::I64x2ExtMulLowI32x4U | Operator::I64x2ExtMulHighI32x4U - | Operator::I64x2Eq - | Operator::I64x2Ne - | Operator::I64x2LtS - | Operator::I64x2GtS - | Operator::I64x2LeS - | Operator::I64x2GeS | Operator::I64x2Abs | Operator::I64x2AllTrue | Operator::I16x8ExtAddPairwiseI8x16S @@ -2646,7 +2640,14 @@ fn type_of(operator: &Operator) -> Type { | Operator::V128Load64Splat { .. } | Operator::I64x2ExtractLane { .. } | Operator::I64x2ReplaceLane { .. } + | Operator::I64x2Eq + | Operator::I64x2Ne + | Operator::I64x2LtS + | Operator::I64x2GtS + | Operator::I64x2LeS + | Operator::I64x2GeS | Operator::I64x2Neg + | Operator::I64x2Abs | Operator::I64x2Shl | Operator::I64x2ShrS | Operator::I64x2ShrU