From 9e34df33b90327309b414b3774b909a5b87a2f67 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 18 Jun 2021 17:28:55 +0200 Subject: [PATCH 01/14] Remove the old x86 backend --- .github/workflows/main.yml | 22 - Cargo.toml | 3 - build.rs | 5 - cranelift/codegen/Cargo.toml | 3 - .../codegen/meta/src/isa/x86/encodings.rs | 2731 ------------- .../codegen/meta/src/isa/x86/instructions.rs | 723 ---- .../codegen/meta/src/isa/x86/legalize.rs | 827 ---- cranelift/codegen/meta/src/isa/x86/mod.rs | 78 +- cranelift/codegen/meta/src/isa/x86/opcodes.rs | 721 ---- cranelift/codegen/meta/src/isa/x86/recipes.rs | 3445 ----------------- .../codegen/meta/src/isa/x86/registers.rs | 43 - cranelift/codegen/shared/src/isa/mod.rs | 3 - .../shared/src/isa/x86/encoding_bits.rs | 419 -- cranelift/codegen/shared/src/isa/x86/mod.rs | 4 - cranelift/codegen/shared/src/lib.rs | 1 - .../codegen/src/isa/aarch64/lower_inst.rs | 39 - cranelift/codegen/src/isa/legacy/mod.rs | 8 - cranelift/codegen/src/isa/legacy/x86/abi.rs | 1102 ------ .../codegen/src/isa/legacy/x86/binemit.rs | 578 --- .../codegen/src/isa/legacy/x86/enc_tables.rs | 1894 --------- cranelift/codegen/src/isa/legacy/x86/mod.rs | 199 - .../codegen/src/isa/legacy/x86/registers.rs | 86 - .../codegen/src/isa/legacy/x86/settings.rs | 52 - .../codegen/src/isa/legacy/x86/unwind.rs | 531 --- .../src/isa/legacy/x86/unwind/systemv.rs | 235 -- .../src/isa/legacy/x86/unwind/winx64.rs | 265 -- cranelift/codegen/src/isa/mod.rs | 56 +- cranelift/codegen/src/isa/s390x/lower.rs | 39 - .../src/isa/x64/inst/unwind/systemv.rs | 2 - cranelift/codegen/src/isa/x64/lower.rs | 38 - cranelift/codegen/src/legalizer/mod.rs | 4 +- cranelift/codegen/src/machinst/adapter.rs | 6 +- .../filetests/isa/x64/amode-opt.clif | 2 +- cranelift/filetests/filetests/isa/x64/b1.clif | 2 +- .../filetests/filetests/isa/x64/basic.clif | 2 +- .../filetests/filetests/isa/x64/branches.clif | 2 +- .../filetests/isa/x64/call-conv.clif | 2 +- .../filetests/isa/x64/clz-lzcnt.clif | 2 +- .../filetests/isa/x64/cmp-mem-bug.clif | 2 +- .../filetests/filetests/isa/x64/ctz-bmi1.clif | 2 +- .../filetests/isa/x64/div-checks.clif | 2 +- .../filetests/filetests/isa/x64/fastcall.clif | 4 +- .../filetests/isa/x64/floating-point.clif | 2 +- .../filetests/filetests/isa/x64/heap.clif | 2 +- .../filetests/filetests/isa/x64/i128.clif | 14 +- .../filetests/filetests/isa/x64/load-op.clif | 2 +- .../filetests/isa/x64/move-elision.clif | 2 +- .../filetests/isa/x64/popcnt-use-popcnt.clif | 2 +- .../filetests/filetests/isa/x64/popcnt.clif | 2 +- .../filetests/isa/x64/probestack.clif | 2 +- .../filetests/isa/x64/select-i128.clif | 4 +- .../isa/x64/simd-bitwise-compile.clif | 2 +- .../isa/x64/simd-comparison-legalize.clif | 2 +- .../isa/x64/simd-lane-access-compile.clif | 2 +- .../isa/x64/simd-logical-compile.clif | 2 +- .../isa/x64/store-stack-full-width-i32.clif | 2 +- .../filetests/isa/x64/struct-arg.clif | 2 +- .../filetests/isa/x64/struct-ret.clif | 2 +- .../filetests/filetests/isa/x64/tls_elf.clif | 2 +- .../filetests/isa/x64/uextend-elision.clif | 2 +- .../isa/x64/unused_jt_unreachable_block.clif | 2 +- .../filetests/filetests/isa/x86/abcd.clif | 13 - .../filetests/filetests/isa/x86/abi-bool.clif | 19 - .../filetests/filetests/isa/x86/abi32.clif | 20 - .../filetests/filetests/isa/x86/abi64.clif | 37 - .../isa/x86/allones_funcaddrs32.clif | 25 - .../isa/x86/allones_funcaddrs64.clif | 27 - .../isa/x86/baldrdash-table-sig-reg.clif | 14 - .../isa/x86/baseline_clz_ctz_popcount.clif | 92 - .../baseline_clz_ctz_popcount_encoding.clif | 87 - .../filetests/isa/x86/binary32-float.clif | 557 --- .../filetests/filetests/isa/x86/binary32.clif | 721 ---- .../filetests/isa/x86/binary64-float.clif | 638 --- .../filetests/isa/x86/binary64-pic.clif | 83 - .../filetests/filetests/isa/x86/binary64.clif | 1692 -------- .../filetests/filetests/isa/x86/br-i128.clif | 42 - .../filetests/filetests/isa/x86/brz-i8.clif | 38 - .../filetests/isa/x86/brz-x86_32-i64.clif | 36 - .../filetests/isa/x86/extend-i128.clif | 37 - .../filetests/isa/x86/extend-i64.clif | 37 - .../floating-point-zero-constants-32bit.clif | 17 - .../x86/floating-point-zero-constants.clif | 31 - .../isa/x86/i128-isplit-forward-jump.clif | 25 - .../filetests/filetests/isa/x86/i128.clif | 46 - .../filetests/isa/x86/ireduce-i16-to-i8.clif | 8 - .../isa/x86/isplit-not-legalized-twice.clif | 20 - .../filetests/isa/x86/isub_imm-i8.clif | 14 - .../isa/x86/jump_i128_param_unused.clif | 10 - .../filetests/isa/x86/legalize-bint-i8.clif | 10 - .../filetests/isa/x86/legalize-bnot.clif | 28 - .../filetests/isa/x86/legalize-br-icmp.clif | 46 - .../filetests/isa/x86/legalize-br-table.clif | 31 - .../isa/x86/legalize-byte-ops-i8.clif | 36 - .../filetests/isa/x86/legalize-call.clif | 14 - .../isa/x86/legalize-clz-ctz-i8.clif | 25 - .../filetests/isa/x86/legalize-custom.clif | 133 - .../filetests/isa/x86/legalize-div-traps.clif | 192 - .../filetests/isa/x86/legalize-div.clif | 57 - .../isa/x86/legalize-f64const-x64.clif | 13 - .../isa/x86/legalize-fcvt_from_usint-i16.clif | 14 - .../filetests/isa/x86/legalize-heaps.clif | 123 - .../filetests/isa/x86/legalize-i128.clif | 20 - .../filetests/isa/x86/legalize-i64.clif | 357 -- .../filetests/isa/x86/legalize-icmp-i8.clif | 19 - .../filetests/isa/x86/legalize-iconst-i8.clif | 18 - .../filetests/isa/x86/legalize-imul-i8.clif | 11 - .../isa/x86/legalize-imul-imm-i8.clif | 15 - .../isa/x86/legalize-ineg-x86_64.clif | 13 - .../isa/x86/legalize-ireduce-i128.clif | 11 - .../isa/x86/legalize-ireduce-i64.clif | 11 - .../isa/x86/legalize-isplit-backwards.clif | 24 - .../filetests/isa/x86/legalize-libcall.clif | 15 - .../isa/x86/legalize-load-store-i8.clif | 31 - .../filetests/isa/x86/legalize-memory.clif | 115 - .../filetests/isa/x86/legalize-mulhi.clif | 43 - .../filetests/isa/x86/legalize-popcnt-i8.clif | 9 - .../isa/x86/legalize-regmove-i8.clif | 36 - .../filetests/isa/x86/legalize-rotate.clif | 35 - .../filetests/isa/x86/legalize-shlr-i8.clif | 24 - .../filetests/isa/x86/legalize-tables.clif | 73 - .../filetests/isa/x86/legalize-urem-i8.clif | 15 - .../isa/x86/legalize-x86_32-shifts.clif | 51 - .../filetests/isa/x86/load-store-narrow.clif | 16 - .../filetests/filetests/isa/x86/nop.clif | 10 - .../x86/optimized-zero-constants-32bit.clif | 52 - .../isa/x86/optimized-zero-constants.clif | 72 - .../filetests/isa/x86/pinned-reg.clif | 74 - .../isa/x86/probestack-adjusts-sp.clif | 28 - .../isa/x86/probestack-disabled.clif | 24 - .../isa/x86/probestack-noncolocated.clif | 27 - .../filetests/isa/x86/probestack-size.clif | 74 - .../filetests/isa/x86/probestack.clif | 49 - .../filetests/isa/x86/prologue-epilogue.clif | 314 -- .../filetests/isa/x86/relax_branch.clif | 132 - .../isa/x86/saturating-float-cast.clif | 13 - .../filetests/isa/x86/select-i8.clif | 8 - .../isa/x86/shrink-multiple-uses.clif | 18 - .../filetests/filetests/isa/x86/shrink.clif | 40 - .../isa/x86/simd-arithmetic-binemit.clif | 116 - .../isa/x86/simd-arithmetic-legalize.clif | 117 - .../x86/simd-avx512-arithmetic-binemit.clif | 17 - .../x86/simd-avx512-arithmetic-legalize.clif | 10 - .../x86/simd-avx512-conversion-binemit.clif | 9 - .../x86/simd-avx512-conversion-legalize.clif | 10 - .../isa/x86/simd-bitwise-binemit.clif | 99 - .../isa/x86/simd-bitwise-legalize.clif | 111 - .../isa/x86/simd-comparison-binemit.clif | 138 - .../isa/x86/simd-comparison-legalize.clif | 40 - .../isa/x86/simd-conversion-binemit.clif | 26 - .../isa/x86/simd-conversion-legalize.clif | 70 - .../simd-lane-access-binemit-for-size.clif | 34 - .../isa/x86/simd-lane-access-binemit.clif | 126 - .../isa/x86/simd-lane-access-compile.clif | 19 - .../isa/x86/simd-lane-access-legalize.clif | 101 - .../isa/x86/simd-logical-binemit.clif | 33 - .../isa/x86/simd-logical-legalize.clif | 31 - .../isa/x86/simd-logical-rodata.clif | 11 - .../isa/x86/simd-memory-binemit.clif | 85 - .../filetests/isa/x86/simd-pextr-binemit.clif | 22 - .../isa/x86/simd-vconst-binemit.clif | 29 - .../isa/x86/simd-vconst-compile.clif | 16 - .../x86/simd-vconst-optimized-binemit.clif | 10 - .../filetests/isa/x86/simd-vconst-rodata.clif | 49 - .../isa/x86/simd-vselect-binemit.clif | 27 - .../simd-vselect-legalize-to-bitselect.clif | 45 - .../filetests/isa/x86/stack-addr32.clif | 33 - .../filetests/isa/x86/stack-addr64.clif | 45 - .../filetests/isa/x86/stack-load-store64.clif | 21 - .../filetests/isa/x86/stack-load-store8.clif | 19 - .../filetests/isa/x86/struct-arg.clif | 117 - .../filetests/isa/x86/systemv_x64_unwind.clif | 205 - .../filetests/filetests/isa/x86/tls_elf.clif | 18 - .../filetests/filetests/isa/x86/tls_enc.clif | 11 - .../filetests/isa/x86/tls_macho.clif | 18 - .../filetests/isa/x86/uextend-i8-to-i16.clif | 14 - .../isa/x86/windows_fastcall_x64.clif | 255 -- .../isa/x86/windows_fastcall_x64_unwind.clif | 250 -- .../filetests/legalizer/bitrev-i128.clif | 89 - .../filetests/filetests/legalizer/bitrev.clif | 206 - .../filetests/legalizer/br_table_cond.clif | 64 - .../filetests/legalizer/empty_br_table.clif | 17 - .../filetests/legalizer/icmp_imm_i128.clif | 23 - .../filetests/legalizer/pass_by_ref.clif | 31 - .../filetests/legalizer/popcnt-i128.clif | 21 - ...plify_instruction_into_alias_of_value.clif | 7 +- .../filetests/filetests/postopt/basic.clif | 125 - .../filetests/postopt/complex_memory_ops.clif | 94 - .../postopt/fold_offset_into_address.clif | 32 - .../filetests/filetests/regalloc/aliases.clif | 35 - .../filetests/filetests/regalloc/basic.clif | 80 - .../filetests/regalloc/coalesce.clif | 157 - .../filetests/regalloc/coalescing-207.clif | 1527 -------- .../filetests/regalloc/coalescing-216.clif | 87 - .../filetests/regalloc/coloring-227.clif | 115 - .../filetests/regalloc/constraints.clif | 82 - .../regalloc/fallthrough-return.clif | 23 - .../filetests/regalloc/ghost-param.clif | 45 - .../regalloc/global-constraints.clif | 30 - .../filetests/regalloc/global-fixed.clif | 16 - .../regalloc/gpr-deref-safe-335.clif | 44 - .../regalloc/infinite-interference.clif | 37 - .../filetests/filetests/regalloc/iterate.clif | 164 - .../filetests/regalloc/multi-constraints.clif | 51 - .../filetests/regalloc/multiple-returns.clif | 23 - .../regalloc/output-interference.clif | 14 - .../filetests/regalloc/reload-208.clif | 112 - .../filetests/regalloc/reload-779.clif | 23 - .../filetests/filetests/regalloc/reload.clif | 46 - .../filetests/regalloc/schedule-moves.clif | 39 - .../regalloc/solver-fixedconflict-var-2.clif | 100 - .../regalloc/solver-fixedconflict-var-3.clif | 137 - .../regalloc/solver-fixedconflict-var.clif | 173 - .../filetests/regalloc/spill-noregs.clif | 175 - .../filetests/filetests/regalloc/spill.clif | 223 -- .../filetests/regalloc/unreachable_code.clif | 47 - .../filetests/regalloc/x86-regres.clif | 49 - .../regress/allow-relaxation-shrink.clif | 57 - .../filetests/filetests/safepoint/basic.clif | 71 - .../filetests/filetests/safepoint/call.clif | 58 - ...plify_instruction_into_alias_of_value.clif | 18 - .../filetests/filetests/stack_maps/call.clif | 103 - .../filetests/stack_maps/incoming_args.clif | 30 - .../filetests/filetests/verifier/flags.clif | 77 - .../filetests/wasm/multi-val-b1.clif | 68 - .../wasm/multi-val-call-indirect.clif | 26 - .../wasm/multi-val-call-legalize-args.clif | 24 - .../multi-val-reuse-ret-ptr-stack-slot.clif | 61 - .../wasm/multi-val-sret-slot-alignment.clif | 51 - cranelift/filetests/src/function_runner.rs | 6 +- cranelift/filetests/src/test_run.rs | 3 +- cranelift/interpreter/src/step.rs | 38 - cranelift/native/src/lib.rs | 18 +- cranelift/reader/src/parser.rs | 16 +- cranelift/tests/bugpoint_test.clif | 3 +- crates/bench-api/Cargo.toml | 1 - crates/cranelift/Cargo.toml | 1 - crates/wasmtime/Cargo.toml | 3 - crates/wasmtime/src/func.rs | 4 - crates/wasmtime/src/module/registry.rs | 47 - examples/multi.rs | 9 - tests/all/debug/lldb.rs | 38 - tests/all/debug/translate.rs | 23 - tests/all/func.rs | 3 - tests/all/gc.rs | 4 - tests/all/relocs.rs | 2 - tests/all/wast.rs | 6 - 246 files changed, 76 insertions(+), 28804 deletions(-) delete mode 100644 cranelift/codegen/meta/src/isa/x86/encodings.rs delete mode 100644 cranelift/codegen/meta/src/isa/x86/instructions.rs delete mode 100644 cranelift/codegen/meta/src/isa/x86/legalize.rs delete mode 100644 cranelift/codegen/meta/src/isa/x86/opcodes.rs delete mode 100644 cranelift/codegen/meta/src/isa/x86/recipes.rs delete mode 100644 cranelift/codegen/meta/src/isa/x86/registers.rs delete mode 100644 cranelift/codegen/shared/src/isa/mod.rs delete mode 100644 cranelift/codegen/shared/src/isa/x86/encoding_bits.rs delete mode 100644 cranelift/codegen/shared/src/isa/x86/mod.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/abi.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/binemit.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/enc_tables.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/mod.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/registers.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/settings.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/unwind.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs delete mode 100644 cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs delete mode 100644 cranelift/filetests/filetests/isa/x86/abcd.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/abi-bool.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/abi32.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/abi64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/binary32-float.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/binary32.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/binary64-float.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/binary64-pic.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/binary64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/br-i128.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/brz-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/extend-i128.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/extend-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/i128.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-bnot.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-br-table.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-call.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-custom.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-div.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-heaps.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-i128.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-libcall.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-memory.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-rotate.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-tables.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/load-store-narrow.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/nop.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/pinned-reg.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/probestack-disabled.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/probestack-size.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/probestack.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/relax_branch.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/select-i8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/shrink.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/stack-addr32.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/stack-addr64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/stack-load-store64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/stack-load-store8.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/struct-arg.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/tls_elf.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/tls_enc.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/tls_macho.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif delete mode 100644 cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif delete mode 100644 cranelift/filetests/filetests/legalizer/bitrev-i128.clif delete mode 100644 cranelift/filetests/filetests/legalizer/bitrev.clif delete mode 100644 cranelift/filetests/filetests/legalizer/br_table_cond.clif delete mode 100644 cranelift/filetests/filetests/legalizer/empty_br_table.clif delete mode 100644 cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif delete mode 100644 cranelift/filetests/filetests/legalizer/pass_by_ref.clif delete mode 100644 cranelift/filetests/filetests/legalizer/popcnt-i128.clif delete mode 100644 cranelift/filetests/filetests/postopt/basic.clif delete mode 100644 cranelift/filetests/filetests/postopt/complex_memory_ops.clif delete mode 100644 cranelift/filetests/filetests/postopt/fold_offset_into_address.clif delete mode 100644 cranelift/filetests/filetests/regalloc/aliases.clif delete mode 100644 cranelift/filetests/filetests/regalloc/basic.clif delete mode 100644 cranelift/filetests/filetests/regalloc/coalesce.clif delete mode 100644 cranelift/filetests/filetests/regalloc/coalescing-207.clif delete mode 100644 cranelift/filetests/filetests/regalloc/coalescing-216.clif delete mode 100644 cranelift/filetests/filetests/regalloc/coloring-227.clif delete mode 100644 cranelift/filetests/filetests/regalloc/constraints.clif delete mode 100644 cranelift/filetests/filetests/regalloc/fallthrough-return.clif delete mode 100644 cranelift/filetests/filetests/regalloc/ghost-param.clif delete mode 100644 cranelift/filetests/filetests/regalloc/global-constraints.clif delete mode 100644 cranelift/filetests/filetests/regalloc/global-fixed.clif delete mode 100644 cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif delete mode 100644 cranelift/filetests/filetests/regalloc/infinite-interference.clif delete mode 100644 cranelift/filetests/filetests/regalloc/iterate.clif delete mode 100644 cranelift/filetests/filetests/regalloc/multi-constraints.clif delete mode 100644 cranelift/filetests/filetests/regalloc/multiple-returns.clif delete mode 100644 cranelift/filetests/filetests/regalloc/output-interference.clif delete mode 100644 cranelift/filetests/filetests/regalloc/reload-208.clif delete mode 100644 cranelift/filetests/filetests/regalloc/reload-779.clif delete mode 100644 cranelift/filetests/filetests/regalloc/reload.clif delete mode 100644 cranelift/filetests/filetests/regalloc/schedule-moves.clif delete mode 100644 cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif delete mode 100644 cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif delete mode 100644 cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif delete mode 100644 cranelift/filetests/filetests/regalloc/spill-noregs.clif delete mode 100644 cranelift/filetests/filetests/regalloc/spill.clif delete mode 100644 cranelift/filetests/filetests/regalloc/unreachable_code.clif delete mode 100644 cranelift/filetests/filetests/regalloc/x86-regres.clif delete mode 100644 cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif delete mode 100644 cranelift/filetests/filetests/safepoint/basic.clif delete mode 100644 cranelift/filetests/filetests/safepoint/call.clif delete mode 100644 cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif delete mode 100644 cranelift/filetests/filetests/stack_maps/call.clif delete mode 100644 cranelift/filetests/filetests/stack_maps/incoming_args.clif delete mode 100644 cranelift/filetests/filetests/verifier/flags.clif delete mode 100644 cranelift/filetests/filetests/wasm/multi-val-b1.clif delete mode 100644 cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif delete mode 100644 cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif delete mode 100644 cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif delete mode 100644 cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 500c743f97..c38b7cc1a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -328,28 +328,6 @@ jobs: env: RUST_BACKTRACE: 1 - # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. - test_x86: - name: Test old x86 backend - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - uses: ./.github/actions/install-rust - with: - toolchain: stable - - # Install wasm32 targets in order to build various tests throughout the - # repo. - - run: rustup target add wasm32-wasi - - run: rustup target add wasm32-unknown-unknown - - # Run the old x86 backend CI (we will eventually remove this). - - run: ./ci/run-tests.sh --features old-x86-backend --locked - env: - RUST_BACKTRACE: 1 - # Build and test the wasi-nn module. test_wasi_nn: name: Test wasi-nn module diff --git a/Cargo.toml b/Cargo.toml index 08e0c0eb72..d69e3ac062 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,9 +100,6 @@ posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"] # backend is the default now. experimental_x64 = [] -# Use the old x86 backend. -old-x86-backend = ["wasmtime/old-x86-backend"] - [badges] maintenance = { status = "actively-developed" } diff --git a/build.rs b/build.rs index 06f0669cdf..cc6d3e5047 100644 --- a/build.rs +++ b/build.rs @@ -182,11 +182,6 @@ fn write_testsuite_tests( fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { "Cranelift" => match (testsuite, testname) { - // Skip all reference types tests on the old backend. The modern - // implementation of reference types uses atomic instructions - // for reference counts on `externref`, but the old backend does not - // implement atomic instructions. - ("reference_types", _) if cfg!(feature = "old-x86-backend") => return true, // No simd support yet for s390x. ("simd", _) if platform_is_s390x() => return true, // No memory64 support yet for s390x. diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 5bc6b2c9ca..4397568e5a 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -71,9 +71,6 @@ arm32 = [] # Work-in-progress codegen backend for ARM. # backend is the default now. experimental_x64 = [] -# Make the old x86 backend the default. -old-x86-backend = [] - # Option to enable all architectures. all-arch = [ "x86", diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs deleted file mode 100644 index 2f222defb5..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ /dev/null @@ -1,2731 +0,0 @@ -#![allow(non_snake_case)] - -use cranelift_codegen_shared::condcodes::IntCC; -use std::collections::HashMap; - -use crate::cdsl::encodings::{Encoding, EncodingBuilder}; -use crate::cdsl::instructions::{ - vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate, - InstructionPredicateNode, InstructionPredicateRegistry, -}; -use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes::*; - -use super::recipes::{RecipeGroup, Template}; -use crate::cdsl::instructions::BindParameter::Any; - -pub(crate) struct PerCpuModeEncodings { - pub enc32: Vec, - pub enc64: Vec, - pub recipes: Recipes, - recipes_by_name: HashMap, - pub inst_pred_reg: InstructionPredicateRegistry, -} - -impl PerCpuModeEncodings { - fn new() -> Self { - Self { - enc32: Vec::new(), - enc64: Vec::new(), - recipes: Recipes::new(), - recipes_by_name: HashMap::new(), - inst_pred_reg: InstructionPredicateRegistry::new(), - } - } - - fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { - if let Some(found_index) = self.recipes_by_name.get(&recipe.name) { - assert!( - self.recipes[*found_index] == recipe, - "trying to insert different recipes with a same name ({})", - recipe.name - ); - *found_index - } else { - let recipe_name = recipe.name.clone(); - let index = self.recipes.push(recipe); - self.recipes_by_name.insert(recipe_name, index); - index - } - } - - fn make_encoding( - &mut self, - inst: InstSpec, - template: Template, - builder_closure: T, - ) -> Encoding - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let (recipe, bits) = template.build(); - let recipe_number = self.add_recipe(recipe); - let builder = EncodingBuilder::new(inst, recipe_number, bits); - builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) - } - - fn enc32_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding); - } - fn enc32(&mut self, inst: impl Into, template: Template) { - self.enc32_func(inst, template, |x| x); - } - fn enc32_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc32_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc32.push(encoding); - } - - fn enc64_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc64.push(encoding); - } - fn enc64(&mut self, inst: impl Into, template: Template) { - self.enc64_func(inst, template, |x| x); - } - fn enc64_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc64_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc64.push(encoding); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // I32 on x86: no REX prefix. - self.enc32(inst.bind(I32), template.infer_rex()); - - // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I32), template.infer_rex()); - - // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// All variants of REX prefix are explicitly emitted, not inferred. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with and without REX. - fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - - // REX-less encoding must come after REX encoding so we don't use it by default. - // Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds B32/B64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Adds encoding for `inst.b32` to X86_32. - /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. - /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. - fn enc_b32_b64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // B32 on x86: no REX prefix. - self.enc32(inst.bind(B32), template.infer_rex()); - - // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B32), template.infer_rex()); - - // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with a REX prefix. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(I32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r32` to X86_64 with and without REX. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(R32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(R32), template.nonrex()); - self.enc64(inst.bind(R64), template.rex().w()); - } - - fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(R32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R64).bind(Any), template); - } - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64(&mut self, inst: impl Into + Clone, template: Template) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64(inst.clone(), template.rex()); - self.enc64(inst, template); - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_func(inst.clone(), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); - } - fn enc_x86_64_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_isap(inst.clone(), template.rex(), isap); - self.enc64_isap(inst, template, isap); - } - - /// Add all three encodings for `inst`: - /// - X86_32 - /// - X86_64 with and without the REX prefix. - fn enc_both(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc_x86_64(inst, template); - } - fn enc_both_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_isap(inst.clone(), template.clone(), isap); - self.enc_x86_64_isap(inst, template, isap); - } - fn enc_both_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_instp(inst.clone(), template.clone(), instp.clone()); - self.enc_x86_64_instp(inst, template, instp); - } - - /// Add two encodings for `inst`: - /// - X86_32, no REX prefix, since this is not valid in 32-bit mode. - /// - X86_64, dynamically infer the REX prefix. - fn enc_both_inferred(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.infer_rex()); - } - fn enc_both_inferred_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template.infer_rex(), isap); - } - - /// Add two encodings for `inst`: - /// - X86_32 - /// - X86_64 with the REX prefix. - fn enc_both_rex_only(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.rex()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` - /// argument to determine whether or not to set the REX.W bit. - fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I64).bind(Any), template); - } - } - - /// Add the same encoding/recipe pairing to both X86_32 and X86_64 - fn enc_32_64_rec( - &mut self, - inst: impl Clone + Into, - recipe: &EncodingRecipe, - bits: u16, - ) { - self.enc32_rec(inst.clone(), recipe, bits); - self.enc64_rec(inst, recipe, bits); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened - fn enc_32_64_func( - &mut self, - inst: impl Clone + Into, - template: Template, - builder_closure: T, - ) where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding.clone()); - self.enc64.push(encoding); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand - /// binding) has already happened. - fn enc_32_64_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template, isap); - } - - fn enc32_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc32(inst, template), - Some(isap) => self.enc32_isap(inst, template, isap), - } - } - - fn enc64_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc64(inst, template), - Some(isap) => self.enc64_isap(inst, template, isap), - } - } -} - -// Definitions. - -#[inline(never)] -fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let bconst = shared.by_name("bconst"); - let bint = shared.by_name("bint"); - let copy = shared.by_name("copy"); - let copy_special = shared.by_name("copy_special"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let get_pinned_reg = shared.by_name("get_pinned_reg"); - let iconst = shared.by_name("iconst"); - let ireduce = shared.by_name("ireduce"); - let regmove = shared.by_name("regmove"); - let sextend = shared.by_name("sextend"); - let set_pinned_reg = shared.by_name("set_pinned_reg"); - let uextend = shared.by_name("uextend"); - let dummy_sarg_t = shared.by_name("dummy_sarg_t"); - - // Shorthands for recipes. - let rec_copysp = r.template("copysp"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_get_pinned_reg = r.recipe("get_pinned_reg"); - let rec_null = r.recipe("null"); - let rec_pu_id = r.template("pu_id"); - let rec_pu_id_bool = r.template("pu_id_bool"); - let rec_pu_iq = r.template("pu_iq"); - let rec_rmov = r.template("rmov"); - let rec_set_pinned_reg = r.template("set_pinned_reg"); - let rec_u_id = r.template("u_id"); - let rec_u_id_z = r.template("u_id_z"); - let rec_umr = r.template("umr"); - let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); - let rec_urm_noflags = r.template("urm_noflags"); - let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); - let rec_dummy_sarg_t = r.recipe("dummy_sarg_t"); - - // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! - e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); - e.enc_x86_64( - set_pinned_reg.bind(I64), - rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), - ); - - e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - for &ty in &[I8, I16, I32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - for &ty in &[B8, B16, B32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); - e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); - e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); - e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - - // Immediate constants. - e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); - e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - // The 32-bit immediate movl also zero-extends to 64 bits. - let is_unsigned_int32 = - InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0); - - e.enc64_func( - iconst.bind(I64), - rec_pu_id.opcodes(&MOV_IMM).rex(), - |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), - ); - e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { - encoding.inst_predicate(is_unsigned_int32) - }); - - // Sign-extended 32-bit immediate. - e.enc64( - iconst.bind(I64), - rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), - ); - - // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. - e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); - - // Bool constants (uses MOV) - for &ty in &[B1, B8, B16, B32] { - e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); - } - e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); - - // You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be - // on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can - // safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode. - let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm"); - e.enc_both_instp( - iconst.bind(I8), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - - // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that - // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not - // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these - // scenarios, so we explicitly select a wider but permissible opcode. - // - // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't - // an appropriate i16 encoding available. - e.enc_both_instp( - iconst.bind(I16), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_both_instp( - iconst.bind(I32), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); - - // Numerical conversions. - - // Reducing an integer is a no-op. - e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - - e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); - - // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending - // instructions for %al/%ax/%eax to %ax/%eax/%rax. - - // movsbl - e.enc32( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - - // movswl - e.enc32( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - - // movsbq - e.enc64( - sextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), - ); - - // movswq - e.enc64( - sextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), - ); - - // movslq - e.enc64( - sextend.bind(I64).bind(I32), - rec_urm_noflags.opcodes(&MOVSXD).rex().w(), - ); - - // movzbl - e.enc32( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwl - e.enc32( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // movzbq, encoded as movzbl because it's equivalent and shorter. - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwq, encoded as movzwl because it's equivalent and shorter - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // A 32-bit register copy clears the high 32 bits. - e.enc64( - uextend.bind(I64).bind(I32), - rec_umr.opcodes(&MOV_STORE).rex(), - ); - e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); - - // Convert bool to int. - // - // This assumes that b1 is represented as an 8-bit low register with the value 0 - // or 1. - // - // Encode movzbq as movzbl, because it's equivalent and shorter. - for &to in &[I8, I16, I32, I64] { - for &from in &[B1, B8] { - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - if to != I64 { - e.enc32( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - } - } - for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] { - e.enc_both( - bint.bind(*to).bind(*from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - - // Copy Special - // For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); - e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); - - // Copy to SSA. These have to be done with special _rex_only encoders, because the standard - // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account - // the source register, which is specified directly in the instruction. - e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only( - copy_to_ssa.bind(I16), - rec_umr_reg_to_ssa.opcodes(&MOV_STORE), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F64), - rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F32), - rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), - ); - - e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0); -} - -#[inline(never)] -fn define_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let adjust_sp_down = shared.by_name("adjust_sp_down"); - let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); - let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); - let copy_nop = shared.by_name("copy_nop"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let istore16 = shared.by_name("istore16"); - let istore16_complex = shared.by_name("istore16_complex"); - let istore32 = shared.by_name("istore32"); - let istore32_complex = shared.by_name("istore32_complex"); - let istore8 = shared.by_name("istore8"); - let istore8_complex = shared.by_name("istore8_complex"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let sload16 = shared.by_name("sload16"); - let sload16_complex = shared.by_name("sload16_complex"); - let sload32 = shared.by_name("sload32"); - let sload32_complex = shared.by_name("sload32_complex"); - let sload8 = shared.by_name("sload8"); - let sload8_complex = shared.by_name("sload8_complex"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let uload16 = shared.by_name("uload16"); - let uload16_complex = shared.by_name("uload16_complex"); - let uload32 = shared.by_name("uload32"); - let uload32_complex = shared.by_name("uload32_complex"); - let uload8 = shared.by_name("uload8"); - let uload8_complex = shared.by_name("uload8_complex"); - let x86_pop = x86.by_name("x86_pop"); - let x86_push = x86.by_name("x86_push"); - - // Shorthands for recipes. - let rec_adjustsp = r.template("adjustsp"); - let rec_adjustsp_ib = r.template("adjustsp_ib"); - let rec_adjustsp_id = r.template("adjustsp_id"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fillnull = r.recipe("fillnull"); - let rec_fillSib32 = r.template("fillSib32"); - let rec_ld = r.template("ld"); - let rec_ldDisp32 = r.template("ldDisp32"); - let rec_ldDisp8 = r.template("ldDisp8"); - let rec_ldWithIndex = r.template("ldWithIndex"); - let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); - let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); - let rec_popq = r.template("popq"); - let rec_pushq = r.template("pushq"); - let rec_regfill32 = r.template("regfill32"); - let rec_regspill32 = r.template("regspill32"); - let rec_spillSib32 = r.template("spillSib32"); - let rec_st = r.template("st"); - let rec_stacknull = r.recipe("stacknull"); - let rec_stDisp32 = r.template("stDisp32"); - let rec_stDisp32_abcd = r.template("stDisp32_abcd"); - let rec_stDisp8 = r.template("stDisp8"); - let rec_stDisp8_abcd = r.template("stDisp8_abcd"); - let rec_stWithIndex = r.template("stWithIndex"); - let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); - let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); - let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); - let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); - let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); - let rec_st_abcd = r.template("st_abcd"); - - // Loads and stores. - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - - for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { - e.enc_i32_i64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_r32_r64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_x86_64_instp( - uload32_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - - e.enc64_instp( - sload32_complex, - recipe.opcodes(&MOVSXD).rex().w(), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload16_complex, - recipe.opcodes(&MOVZX_WORD), - is_load_complex_length_two.clone(), - ); - e.enc_i32_i64_instp( - sload16_complex, - recipe.opcodes(&MOVSX_WORD), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload8_complex, - recipe.opcodes(&MOVZX_BYTE), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - sload8_complex, - recipe.opcodes(&MOVSX_BYTE), - is_load_complex_length_two.clone(), - ); - } - - let is_store_complex_length_three = - InstructionPredicate::new_length_equals(&*formats.store_complex, 3); - - for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { - e.enc_i32_i64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_r32_r64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore32_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_both_instp( - istore16_complex.bind(I32), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore16_complex.bind(I64), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[ - rec_stWithIndex_abcd, - rec_stWithIndexDisp8_abcd, - rec_stWithIndexDisp32_abcd, - ] { - e.enc_both_instp( - istore8_complex.bind(I32), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore8_complex.bind(I64), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { - e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); - e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); - } - - // Byte stores are more complicated because the registers they can address - // depends of the presence of a REX prefix. The st*_abcd recipes fall back to - // the corresponding st* recipes when a REX prefix is applied. - - for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { - e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - } - - e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); - - // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid - // constraining the permitted registers. - // See MIN_SPILL_SLOT_SIZE which makes this safe. - - e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); - for &ty in &[I8, I16] { - e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); - } - - for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { - e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); - e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); - e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); - e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); - e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); - e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); - } - - e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - - // No-op fills, created by late-stage redundant-fill removal. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0); - for &ty in &[F64, F32] { - e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); - } - for &ty in &[R64, R32] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - - // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. - - e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); - for &ty in &[I8, I16] { - e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); - } - - // Push and Pop. - e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); - e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); - - e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); - e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); - - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn - // into a no-op. - // The same encoding is generated for both the 64- and 32-bit architectures. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - for &ty in &[F64, F32] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - - // Adjust SP down by a dynamic value (or up, with a negative operand). - e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); - e.enc64( - adjust_sp_down.bind(I64), - rec_adjustsp.opcodes(&SUB).rex().w(), - ); - - // Adjust SP up by an immediate (or down, with a negative immediate). - e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); - e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), - ); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), - ); - - // Adjust SP down by an immediate (or up, with a negative immediate). - e.enc32( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), - ); - e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), - ); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), - ); -} - -#[inline(never)] -fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let bitcast = shared.by_name("bitcast"); - let copy = shared.by_name("copy"); - let regmove = shared.by_name("regmove"); - - // Shorthands for recipes. - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_rfumr = r.template("rfumr"); - - // Floating-point moves. - // movd - e.enc_both( - bitcast.bind(F32).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc_both( - bitcast.bind(I32).bind(F32), - rec_rfumr.opcodes(&MOVD_STORE_XMM), - ); - - // movq - e.enc64( - bitcast.bind(F64).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - e.enc64( - bitcast.bind(I64).bind(F64), - rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), - ); - - // movaps - e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); - e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); -} - -#[inline(never)] -fn define_fpu_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let fill = shared.by_name("fill"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - - // Shorthands for recipes. - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - - // Float loads and stores. - e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); - - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndex.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), - ); - - e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); - - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndex.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), - ); - - e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndex.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndex.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); - e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); - e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); - e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); - - e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); - e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); - e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); - e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); -} - -#[inline(never)] -fn define_fpu_ops( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let ceil = shared.by_name("ceil"); - let f32const = shared.by_name("f32const"); - let f64const = shared.by_name("f64const"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdemote = shared.by_name("fdemote"); - let fdiv = shared.by_name("fdiv"); - let ffcmp = shared.by_name("ffcmp"); - let floor = shared.by_name("floor"); - let fmul = shared.by_name("fmul"); - let fpromote = shared.by_name("fpromote"); - let fsub = shared.by_name("fsub"); - let nearest = shared.by_name("nearest"); - let sqrt = shared.by_name("sqrt"); - let trunc = shared.by_name("trunc"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - - // Shorthands for recipes. - let rec_f32imm_z = r.template("f32imm_z"); - let rec_f64imm_z = r.template("f64imm_z"); - let rec_fa = r.template("fa"); - let rec_fcmp = r.template("fcmp"); - let rec_fcscc = r.template("fcscc"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_furmi_rnd = r.template("furmi_rnd"); - let rec_rfurm = r.template("rfurm"); - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for - // 32-bit and 64-bit floats respectively. - let is_zero_32_bit_float = - InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm"); - e.enc32_instp( - f32const, - rec_f32imm_z.opcodes(&XORPS), - is_zero_32_bit_float.clone(), - ); - - let is_zero_64_bit_float = - InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm"); - e.enc32_instp( - f64const, - rec_f64imm_z.opcodes(&XORPD), - is_zero_64_bit_float.clone(), - ); - - e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); - e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); - - // cvtsi2ss - e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); - - // cvtsi2sd - e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); - - // cvtss2sd - e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); - - // cvtsd2ss - e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); - - // cvttss2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), - ); - - // cvttsd2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), - ); - - // Exact square roots. - e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); - e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); - - // Rounding. The recipe looks at the opcode to pick an immediate. - for inst in &[nearest, floor, ceil, trunc] { - e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); - e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); - } - - // Binary arithmetic ops. - e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); - e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); - - e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); - e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); - - e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); - e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); - - e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); - e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); - - e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); - e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); - - e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); - e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); - - // Comparisons. - // - // This only covers the condition codes in `supported_floatccs`, the rest are - // handled by legalization patterns. - e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); - e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); - e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); - e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); -} - -#[inline(never)] -fn define_alu( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let clz = shared.by_name("clz"); - let ctz = shared.by_name("ctz"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); - let ifcmp = shared.by_name("ifcmp"); - let ifcmp_imm = shared.by_name("ifcmp_imm"); - let ifcmp_sp = shared.by_name("ifcmp_sp"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let popcnt = shared.by_name("popcnt"); - let rotl = shared.by_name("rotl"); - let rotl_imm = shared.by_name("rotl_imm"); - let rotr = shared.by_name("rotr"); - let rotr_imm = shared.by_name("rotr_imm"); - let selectif = shared.by_name("selectif"); - let selectif_spectre_guard = shared.by_name("selectif_spectre_guard"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let trueff = shared.by_name("trueff"); - let trueif = shared.by_name("trueif"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let x86_bsf = x86.by_name("x86_bsf"); - let x86_bsr = x86.by_name("x86_bsr"); - - // Shorthands for recipes. - let rec_bsf_and_bsr = r.template("bsf_and_bsr"); - let rec_cmov = r.template("cmov"); - let rec_icscc = r.template("icscc"); - let rec_icscc_ib = r.template("icscc_ib"); - let rec_icscc_id = r.template("icscc_id"); - let rec_rcmp = r.template("rcmp"); - let rec_rcmp_ib = r.template("rcmp_ib"); - let rec_rcmp_id = r.template("rcmp_id"); - let rec_rcmp_sp = r.template("rcmp_sp"); - let rec_rc = r.template("rc"); - let rec_setf_abcd = r.template("setf_abcd"); - let rec_seti_abcd = r.template("seti_abcd"); - let rec_urm = r.template("urm"); - - // Predicates shorthands. - let use_popcnt = settings.predicate_by_name("use_popcnt"); - let use_lzcnt = settings.predicate_by_name("use_lzcnt"); - let use_bmi1 = settings.predicate_by_name("use_bmi1"); - - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let band_not = shared.by_name("band_not"); - let bnot = shared.by_name("bnot"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let iadd = shared.by_name("iadd"); - let iadd_ifcarry = shared.by_name("iadd_ifcarry"); - let iadd_ifcin = shared.by_name("iadd_ifcin"); - let iadd_ifcout = shared.by_name("iadd_ifcout"); - let iadd_imm = shared.by_name("iadd_imm"); - let imul = shared.by_name("imul"); - let isub = shared.by_name("isub"); - let isub_ifbin = shared.by_name("isub_ifbin"); - let isub_ifborrow = shared.by_name("isub_ifborrow"); - let isub_ifbout = shared.by_name("isub_ifbout"); - let x86_sdivmodx = x86.by_name("x86_sdivmodx"); - let x86_smulx = x86.by_name("x86_smulx"); - let x86_udivmodx = x86.by_name("x86_udivmodx"); - let x86_umulx = x86.by_name("x86_umulx"); - - let rec_div = r.template("div"); - let rec_fa = r.template("fa"); - let rec_fax = r.template("fax"); - let rec_mulx = r.template("mulx"); - let rec_r_ib = r.template("r_ib"); - let rec_r_id = r.template("r_id"); - let rec_rin = r.template("rin"); - let rec_rio = r.template("rio"); - let rec_rout = r.template("rout"); - let rec_rr = r.template("rr"); - let rec_rrx = r.template("rrx"); - let rec_ur = r.template("ur"); - - e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); - e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); - e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); - e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); - - e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); - e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); - - e.enc_i32_i64(band, rec_rr.opcodes(&AND)); - e.enc_b32_b64(band, rec_rr.opcodes(&AND)); - - // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can - // even use the single-byte immediate for 0xffff_ffXX masks. - - e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); - e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); - - e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); - e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); - e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); - e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); - - e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); - e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); - e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); - e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); - - // x86 has a bitwise not instruction NOT. - e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2)); - - // Also add a `b1` encodings for the logic instructions. - // TODO: Should this be done with 8-bit instructions? It would improve partial register - // dependencies. - e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); - e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); - e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); - - e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); - e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); - e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); - - e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); - e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); - - // Binary bitwise ops. - // - // The F64 version is intentionally encoded using the single-precision opcode: - // the operation is identical and the encoding is one byte shorter. - e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); - e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); - - e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); - e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); - - e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); - e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); - - // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. - e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); - e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); - - // Shifts and rotates. - // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit - // and 16-bit shifts would need explicit masking. - - for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { - // Cannot use enc_i32_i64 for this pattern because instructions require - // to bind any. - e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr)); - e.enc32( - inst.bind(I32).bind(I16), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc32( - inst.bind(I32).bind(I32), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc64( - inst.bind(I64).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - } - - e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); - e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); - e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); - e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); - e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); - - // Population count. - e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - e.enc64_isap( - popcnt.bind(I64), - rec_urm.opcodes(&POPCNT).rex().w(), - use_popcnt, - ); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - - // Count leading zero bits. - e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - - // Count trailing zero bits. - e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - - // Bit scan forwards and reverse - e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); - e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); - - // Comparisons - e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); - e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); - e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); - // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). - - e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); - e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); - - // Convert flags to bool. - // This encodes `b1` as an 8-bit low register with the value 0 or 1. - e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - - // Conditional move (a.k.a integer select). - e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); - // A Spectre-guard integer select is exactly the same as a selectif, but - // is not associated with any other legalization rules and is not - // recognized by any optimizations, so it must arrive here unmodified - // and in its original place. - e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW)); -} - -#[inline(never)] -#[allow(clippy::cognitive_complexity)] -fn define_simd( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let avg_round = shared.by_name("avg_round"); - let bitcast = shared.by_name("bitcast"); - let bor = shared.by_name("bor"); - let bxor = shared.by_name("bxor"); - let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdiv = shared.by_name("fdiv"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let fmul = shared.by_name("fmul"); - let fsub = shared.by_name("fsub"); - let iabs = shared.by_name("iabs"); - let iadd = shared.by_name("iadd"); - let icmp = shared.by_name("icmp"); - let imul = shared.by_name("imul"); - let ishl_imm = shared.by_name("ishl_imm"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let raw_bitcast = shared.by_name("raw_bitcast"); - let regfill = shared.by_name("regfill"); - let regmove = shared.by_name("regmove"); - let regspill = shared.by_name("regspill"); - let sadd_sat = shared.by_name("sadd_sat"); - let scalar_to_vector = shared.by_name("scalar_to_vector"); - let sload8x8 = shared.by_name("sload8x8"); - let sload8x8_complex = shared.by_name("sload8x8_complex"); - let sload16x4 = shared.by_name("sload16x4"); - let sload16x4_complex = shared.by_name("sload16x4_complex"); - let sload32x2 = shared.by_name("sload32x2"); - let sload32x2_complex = shared.by_name("sload32x2_complex"); - let spill = shared.by_name("spill"); - let sqrt = shared.by_name("sqrt"); - let sshr_imm = shared.by_name("sshr_imm"); - let ssub_sat = shared.by_name("ssub_sat"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let swiden_low = shared.by_name("swiden_low"); - let uadd_sat = shared.by_name("uadd_sat"); - let uload8x8 = shared.by_name("uload8x8"); - let uload8x8_complex = shared.by_name("uload8x8_complex"); - let uload16x4 = shared.by_name("uload16x4"); - let uload16x4_complex = shared.by_name("uload16x4_complex"); - let uload32x2 = shared.by_name("uload32x2"); - let uload32x2_complex = shared.by_name("uload32x2_complex"); - let snarrow = shared.by_name("snarrow"); - let unarrow = shared.by_name("unarrow"); - let uwiden_low = shared.by_name("uwiden_low"); - let ushr_imm = shared.by_name("ushr_imm"); - let usub_sat = shared.by_name("usub_sat"); - let vconst = shared.by_name("vconst"); - let vselect = shared.by_name("vselect"); - let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_insertps = x86.by_name("x86_insertps"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - let x86_movlhps = x86.by_name("x86_movlhps"); - let x86_movsd = x86.by_name("x86_movsd"); - let x86_pblendw = x86.by_name("x86_pblendw"); - let x86_pextr = x86.by_name("x86_pextr"); - let x86_pinsr = x86.by_name("x86_pinsr"); - let x86_pmaxs = x86.by_name("x86_pmaxs"); - let x86_pmaxu = x86.by_name("x86_pmaxu"); - let x86_pmins = x86.by_name("x86_pmins"); - let x86_pminu = x86.by_name("x86_pminu"); - let x86_pmullq = x86.by_name("x86_pmullq"); - let x86_pmuludq = x86.by_name("x86_pmuludq"); - let x86_palignr = x86.by_name("x86_palignr"); - let x86_pshufb = x86.by_name("x86_pshufb"); - let x86_pshufd = x86.by_name("x86_pshufd"); - let x86_psll = x86.by_name("x86_psll"); - let x86_psra = x86.by_name("x86_psra"); - let x86_psrl = x86.by_name("x86_psrl"); - let x86_ptest = x86.by_name("x86_ptest"); - let x86_punpckh = x86.by_name("x86_punpckh"); - let x86_punpckl = x86.by_name("x86_punpckl"); - let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); - - // Shorthands for recipes. - let rec_blend = r.template("blend"); - let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); - let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); - let rec_f_ib = r.template("f_ib"); - let rec_fa = r.template("fa"); - let rec_fa_ib = r.template("fa_ib"); - let rec_fax = r.template("fax"); - let rec_fcmp = r.template("fcmp"); - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - let rec_furm = r.template("furm"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_icscc_fpr = r.template("icscc_fpr"); - let rec_null_fpr = r.recipe("null_fpr"); - let rec_pfcmp = r.template("pfcmp"); - let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); - let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); - let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); - let rec_stacknull = r.recipe("stacknull"); - let rec_vconst = r.template("vconst"); - let rec_vconst_optimized = r.template("vconst_optimized"); - - // Predicates shorthands. - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); - let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); - let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); - let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); - let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); - - // SIMD vector size: eventually multiple vector sizes may be supported but for now only - // SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - - // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see - // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the - // value across the register. - - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // PSHUFB, 8-bit shuffle using two XMM registers. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); - let template = rec_fa.opcodes(&PSHUFB); - e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd)); - } - - // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD); - e.enc_both_inferred(instruction, template); - } - - // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be - // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB; - // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 32 => &BLENDVPS, - 64 => &BLENDVPD, - _ => &PBLENDVB, - }; - let instruction = vselect.bind(vector(ty, sse_vector_size)); - let template = rec_blend.opcodes(opcode); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // PBLENDW, select lanes using a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let instruction = x86_pblendw.bind(vector(ty, sse_vector_size)); - let template = rec_fa_ib.opcodes(&PBLENDW); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according - // to the Intel manual: "When the destination operand is an XMM register, the source operand is - // written to the low doubleword of the register and the register is zero-extended to 128 bits." - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); - if ty.is_float() { - // No need to move floats--they already live in XMM registers. - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } else { - let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); - if ty.lane_bits() < 64 { - e.enc_both_inferred(instruction, template); - } else { - // No 32-bit encodings for 64-bit widths. - assert_eq!(ty.lane_bits(), 64); - e.enc64(instruction, template.rex().w()); - } - } - } - - // SIMD insertlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (opcode, isap): (&[_], _) = match ty.lane_bits() { - 8 => (&PINSRB, Some(use_sse41_simd)), - 16 => (&PINSRW, None), - 32 | 64 => (&PINSR, Some(use_sse41_simd)), - _ => panic!("invalid size for SIMD insertlane"), - }; - - let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_r.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, isap); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), isap); - } - } - - // For legalizing insertlane with floats, INSERTPS from SSE4.1. - { - let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); - let template = rec_fa_ib.opcodes(&INSERTPS); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // For legalizing insertlane with floats, MOVSD from SSE2. - { - let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVSD_LOAD); - e.enc_both_inferred(instruction, template); // from SSE2 - } - - // For legalizing insertlane with floats, MOVLHPS from SSE. - { - let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVLHPS); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD extractlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 8 => &PEXTRB, - 16 => &PEXTRW, - 32 | 64 => &PEXTR, - _ => panic!("invalid size for SIMD extractlane"), - }; - - let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_gpr.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); - } - } - - // SIMD packing/unpacking - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (high, low) = match ty.lane_bits() { - 8 => (&PUNPCKHBW, &PUNPCKLBW), - 16 => (&PUNPCKHWD, &PUNPCKLWD), - 32 => (&PUNPCKHDQ, &PUNPCKLDQ), - 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ), - _ => panic!("invalid size for SIMD packing/unpacking"), - }; - - e.enc_both_inferred( - x86_punpckh.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(high), - ); - e.enc_both_inferred( - x86_punpckl.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(low), - ); - } - - // SIMD narrow/widen - for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { - let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); - } - for (ty, opcodes, isap) in &[ - (I16, &PACKUSWB[..], None), - (I32, &PACKUSDW[..], Some(use_sse41_simd)), - ] { - let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); - } - for (ty, swiden_opcode, uwiden_opcode) in &[ - (I8, &PMOVSXBW[..], &PMOVZXBW[..]), - (I16, &PMOVSXWD[..], &PMOVZXWD[..]), - ] { - let isap = Some(use_sse41_simd); - let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); - let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); - } - for ty in &[I8, I16, I32, I64] { - e.enc_both_inferred_maybe_isap( - x86_palignr.bind(vector(*ty, sse_vector_size)), - rec_fa_ib.opcodes(&PALIGNR[..]), - Some(use_ssse3_simd), - ); - } - - // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). - for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { - for to_type in - ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) - { - let instruction = raw_bitcast - .bind(vector(to_type, sse_vector_size)) - .bind(vector(from_type, sse_vector_size)); - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } - } - - // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an - // XMM register. - for float_type in &[F32, F64] { - for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { - e.enc_32_64_rec( - raw_bitcast - .bind(vector(lane_type, sse_vector_size)) - .bind(*float_type), - rec_null_fpr, - 0, - ); - e.enc_32_64_rec( - raw_bitcast - .bind(*float_type) - .bind(vector(lane_type, sse_vector_size)), - rec_null_fpr, - 0, - ); - } - } - - // SIMD conversions - { - let fcvt_from_sint_32 = fcvt_from_sint - .bind(vector(F32, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); - - e.enc_32_64_maybe_isap( - x86_vcvtudq2ps, - rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), - Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F - ); - - e.enc_both_inferred( - x86_cvtt2si - .bind(vector(I32, sse_vector_size)) - .bind(vector(F32, sse_vector_size)), - rec_furm.opcodes(&CVTTPS2DQ), - ); - } - - // SIMD vconst for special cases (all zeroes, all ones) - // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this - // encoding first - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - - let is_zero_128bit = - InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex(); - e.enc_32_64_func(instruction.clone(), template, |builder| { - builder.inst_predicate(is_zero_128bit) - }); - - let is_ones_128bit = - InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex(); - e.enc_32_64_func(instruction, template, |builder| { - builder.inst_predicate(is_ones_128bit) - }); - } - - // SIMD vconst using MOVUPS - // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have - // to guarantee that the constants are aligned when emitted and there is currently no mechanism - // for that; alternately, constants could be loaded into XMM registers using a sequence like: - // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored - // in memory) but some performance measurements are needed. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - let template = rec_vconst.opcodes(&MOVUPS_LOAD); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of - // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have - // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124). - // Also, it would be ideal to infer REX prefixes for all of these instructions but for the - // time being only instructions with common recipes have `infer_rex()` support. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // Store - let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); - - // Store complex - let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndex.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex, - rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE), - ); - - // Load - let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); - - // Load complex - let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndex.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex, - rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD), - ); - - // Spill - let bound_spill = spill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); - let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); - - // Fill - let bound_fill = fill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); - let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); - let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); - - // Regmove - let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); - - // Copy - let bound_copy = copy.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); - let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD)); - let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); - } - - // SIMD load extend - for (inst, opcodes) in &[ - (uload8x8, &PMOVZXBW), - (uload16x4, &PMOVZXWD), - (uload32x2, &PMOVZXDQ), - (sload8x8, &PMOVSXBW), - (sload16x4, &PMOVSXWD), - (sload32x2, &PMOVSXDQ), - ] { - let isap = Some(use_sse41_simd); - for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] { - let inst = *inst; - let template = recipe.opcodes(*opcodes); - e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap); - e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap); - } - } - - // SIMD load extend (complex addressing) - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - for (inst, opcodes) in &[ - (uload8x8_complex, &PMOVZXBW), - (uload16x4_complex, &PMOVZXWD), - (uload32x2_complex, &PMOVZXDQ), - (sload8x8_complex, &PMOVSXBW), - (sload16x4_complex, &PMOVSXWD), - (sload32x2_complex, &PMOVSXDQ), - ] { - for recipe in &[ - rec_fldWithIndex, - rec_fldWithIndexDisp8, - rec_fldWithIndexDisp32, - ] { - let template = recipe.opcodes(*opcodes); - let predicate = |encoding: EncodingBuilder| { - encoding - .isa_predicate(use_sse41_simd) - .inst_predicate(is_load_complex_length_two.clone()) - }; - e.enc32_func(inst.clone(), template.clone(), predicate); - // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64. - e.enc64_func(inst.clone(), template.rex(), predicate); - e.enc64_func(inst.clone(), template, predicate); - } - } - - // SIMD integer addition - for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { - let iadd = iadd.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating addition - e.enc_both_inferred( - sadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDSB), - ); - e.enc_both_inferred( - sadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDSW), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDUSB), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDUSW), - ); - - // SIMD integer subtraction - let isub = shared.by_name("isub"); - for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { - let isub = isub.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating subtraction - e.enc_both_inferred( - ssub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBSB), - ); - e.enc_both_inferred( - ssub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBSW), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBUSB), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBUSW), - ); - - // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 - // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. - for (ty, opcodes, isap) in &[ - (I16, &PMULLW[..], None), - (I32, &PMULLD[..], Some(use_sse41_simd)), - ] { - let imul = imul.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); - } - - // SIMD multiplication with lane expansion. - e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ)); - - // SIMD multiplication and add adjacent pairs, from SSE2. - e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD)); - - // SIMD integer multiplication for I64x2 using a AVX512. - { - e.enc_32_64_maybe_isap( - x86_pmullq, - rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(), - Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL - ); - } - - // SIMD integer average with rounding. - for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { - let avgr = avg_round.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes)); - } - - // SIMD integer absolute value. - for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] { - let iabs = iabs.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd)); - } - - // SIMD logical operations - let band = shared.by_name("band"); - let band_not = shared.by_name("band_not"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // and - let band = band.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band, rec_fa.opcodes(&PAND)); - - // and not (note flipped recipe operands to match band_not order) - let band_not = band_not.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN)); - - // or - let bor = bor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bor, rec_fa.opcodes(&POR)); - - // xor - let bxor = bxor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR)); - - // ptest - let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); - } - - // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement - // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an - // I128x1 but restrictions on the type builder prevent this; the general idea here is that - // the upper bits are all zeroed and do not form parts of any separate lane. See - // https://github.com/bytecodealliance/wasmtime/issues/1140. - e.enc_both_inferred( - bitcast.bind(vector(I64, sse_vector_size)).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc64( - bitcast.bind(vector(I64, sse_vector_size)).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - - // SIMD shift left - for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] { - let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (logical) - for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] { - let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (arithmetic) - for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] { - let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes)); - } - - // SIMD immediate shift - for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { - let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); - - let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); - - // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set. - if *ty != I64 { - let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); - } - } - - // SIMD integer comparisons - { - use IntCC::*; - for (ty, cc, opcodes, isa_predicate) in &[ - (I8, Equal, &PCMPEQB[..], None), - (I16, Equal, &PCMPEQW[..], None), - (I32, Equal, &PCMPEQD[..], None), - (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), - (I8, SignedGreaterThan, &PCMPGTB[..], None), - (I16, SignedGreaterThan, &PCMPGTW[..], None), - (I32, SignedGreaterThan, &PCMPGTD[..], None), - (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), - ] { - let instruction = icmp - .bind(Immediate::IntCC(*cc)) - .bind(vector(*ty, sse_vector_size)); - let template = rec_icscc_fpr.opcodes(opcodes); - e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate); - } - } - - // SIMD min/max - for (ty, inst, opcodes, isa_predicate) in &[ - (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), - (I16, x86_pmaxs, &PMAXSW[..], None), - (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), - (I8, x86_pmaxu, &PMAXUB[..], None), - (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), - (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), - (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), - (I16, x86_pmins, &PMINSW[..], None), - (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), - (I8, x86_pminu, &PMINUB[..], None), - (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), - (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate); - } - - // SIMD float comparisons - e.enc_both_inferred( - fcmp.bind(vector(F32, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPS), - ); - e.enc_both_inferred( - fcmp.bind(vector(F64, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPD), - ); - - // SIMD float arithmetic - for (ty, inst, opcodes) in &[ - (F32, fadd, &ADDPS[..]), - (F64, fadd, &ADDPD[..]), - (F32, fsub, &SUBPS[..]), - (F64, fsub, &SUBPD[..]), - (F32, fmul, &MULPS[..]), - (F64, fmul, &MULPD[..]), - (F32, fdiv, &DIVPS[..]), - (F64, fdiv, &DIVPD[..]), - (F32, x86_fmin, &MINPS[..]), - (F64, x86_fmin, &MINPD[..]), - (F32, x86_fmax, &MAXPS[..]), - (F64, x86_fmax, &MAXPD[..]), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_fa.opcodes(opcodes)); - } - for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_furm.opcodes(opcodes)); - } -} - -#[inline(never)] -fn define_entity_ref( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let const_addr = shared.by_name("const_addr"); - let func_addr = shared.by_name("func_addr"); - let stack_addr = shared.by_name("stack_addr"); - let symbol_value = shared.by_name("symbol_value"); - - // Shorthands for recipes. - let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); - let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); - let rec_fnaddr4 = r.template("fnaddr4"); - let rec_fnaddr8 = r.template("fnaddr8"); - let rec_const_addr = r.template("const_addr"); - let rec_got_fnaddr8 = r.template("got_fnaddr8"); - let rec_got_gvaddr8 = r.template("got_gvaddr8"); - let rec_gvaddr4 = r.template("gvaddr4"); - let rec_gvaddr8 = r.template("gvaddr8"); - let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); - let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); - let rec_spaddr_id = r.template("spaddr_id"); - - // Predicates shorthands. - let all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - let is_pic = settings.predicate_by_name("is_pic"); - let not_all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let not_is_pic = settings.predicate_by_name("not_is_pic"); - - // Function addresses. - - // Non-PIC, all-ones funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_fnaddr4.opcodes(&MOV_IMM), - not_all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), - not_all_ones_funcaddrs_and_not_is_pic, - ); - - // Non-PIC, all-zeros funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_allones_fnaddr4.opcodes(&MOV_IMM), - all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), - all_ones_funcaddrs_and_not_is_pic, - ); - - // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. - let is_colocated_func = - InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); - e.enc64_instp( - func_addr.bind(I64), - rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), - is_colocated_func, - ); - - // 64-bit, non-colocated, PIC. - e.enc64_isap( - func_addr.bind(I64), - rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Global addresses. - - // Non-PIC. - e.enc32_isap( - symbol_value.bind(I32), - rec_gvaddr4.opcodes(&MOV_IMM), - not_is_pic, - ); - e.enc64_isap( - symbol_value.bind(I64), - rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), - not_is_pic, - ); - - // PIC, colocated. - e.enc64_func( - symbol_value.bind(I64), - rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), - |encoding| { - encoding - .isa_predicate(is_pic) - .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) - }, - ); - - // PIC, non-colocated. - e.enc64_isap( - symbol_value.bind(I64), - rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Stack addresses. - // - // TODO: Add encoding rules for stack_load and stack_store, so that they - // don't get legalized to stack_addr + load/store. - e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w()); - e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA)); - - // Constant addresses (PIC). - e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w()); - e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA)); -} - -/// Control flow opcodes. -#[inline(never)] -fn define_control_flow( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let brff = shared.by_name("brff"); - let brif = shared.by_name("brif"); - let brnz = shared.by_name("brnz"); - let brz = shared.by_name("brz"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let debugtrap = shared.by_name("debugtrap"); - let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); - let jump = shared.by_name("jump"); - let jump_table_base = shared.by_name("jump_table_base"); - let jump_table_entry = shared.by_name("jump_table_entry"); - let return_ = shared.by_name("return"); - let trap = shared.by_name("trap"); - let trapff = shared.by_name("trapff"); - let trapif = shared.by_name("trapif"); - let resumable_trap = shared.by_name("resumable_trap"); - - // Shorthands for recipes. - let rec_brfb = r.template("brfb"); - let rec_brfd = r.template("brfd"); - let rec_brib = r.template("brib"); - let rec_brid = r.template("brid"); - let rec_call_id = r.template("call_id"); - let rec_call_plt_id = r.template("call_plt_id"); - let rec_call_r = r.template("call_r"); - let rec_debugtrap = r.recipe("debugtrap"); - let rec_indirect_jmp = r.template("indirect_jmp"); - let rec_jmpb = r.template("jmpb"); - let rec_jmpd = r.template("jmpd"); - let rec_jt_base = r.template("jt_base"); - let rec_jt_entry = r.template("jt_entry"); - let rec_ret = r.template("ret"); - let rec_t8jccb_abcd = r.template("t8jccb_abcd"); - let rec_t8jccd_abcd = r.template("t8jccd_abcd"); - let rec_t8jccd_long = r.template("t8jccd_long"); - let rec_tjccb = r.template("tjccb"); - let rec_tjccd = r.template("tjccd"); - let rec_trap = r.template("trap"); - let rec_trapif = r.recipe("trapif"); - let rec_trapff = r.recipe("trapff"); - - // Predicates shorthands. - let is_pic = settings.predicate_by_name("is_pic"); - - // Call/return - - // 32-bit, both PIC and non-PIC. - e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); - - // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. - let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); - e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); - - // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC - // is currently using the large model, which requires calls be lowered to - // func_addr+call_indirect. - e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); - - e.enc32( - call_indirect.bind(I32), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - - e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); - e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); - - // Branches. - e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - - e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Not all float condition codes are legal, see `supported_floatccs`. - e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Note that the tjccd opcode will be prefixed with 0x0f. - e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); - - // Branch on a b1 value in a register only looks at the low 8 bits. See also - // bint encodings below. - // - // Start with the worst-case encoding for X86_32 only. The register allocator - // can't handle a branch with an ABCD-constrained operand. - e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); - e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); - - e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); - e.enc_both( - brnz.bind(B1), - rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), - ); - e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); - - // Jump tables. - e.enc64( - jump_table_entry.bind(I64), - rec_jt_entry.opcodes(&MOVSXD).rex().w(), - ); - e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); - - e.enc64( - jump_table_base.bind(I64), - rec_jt_base.opcodes(&LEA).rex().w(), - ); - e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); - - e.enc_x86_64( - indirect_jump_table_br.bind(I64), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - e.enc32( - indirect_jump_table_br.bind(I32), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - - // Trap as ud2 - e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - - // Debug trap as int3 - e.enc32_rec(debugtrap, rec_debugtrap, 0); - e.enc64_rec(debugtrap, rec_debugtrap, 0); - - e.enc32_rec(trapif, rec_trapif, 0); - e.enc64_rec(trapif, rec_trapif, 0); - e.enc32_rec(trapff, rec_trapff, 0); - e.enc64_rec(trapff, rec_trapff, 0); -} - -/// Reference type instructions. -#[inline(never)] -fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - let is_null = shared.by_name("is_null"); - let is_invalid = shared.by_name("is_invalid"); - let null = shared.by_name("null"); - let safepoint = shared.by_name("safepoint"); - - let rec_is_zero = r.template("is_zero"); - let rec_is_invalid = r.template("is_invalid"); - let rec_pu_id_ref = r.template("pu_id_ref"); - let rec_safepoint = r.recipe("safepoint"); - - // Null references implemented as iconst 0. - e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); - - e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); - e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); - - // is_null, implemented by testing whether the value is 0. - e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); - - // is_invalid, implemented by testing whether the value is -1. - e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7)); - - // safepoint instruction calls sink, no actual encoding. - e.enc32_rec(safepoint, rec_safepoint, 0); - e.enc64_rec(safepoint, rec_safepoint, 0); -} - -#[allow(clippy::cognitive_complexity)] -pub(crate) fn define( - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) -> PerCpuModeEncodings { - // Definitions. - let mut e = PerCpuModeEncodings::new(); - - define_moves(&mut e, shared_defs, r); - define_memory(&mut e, shared_defs, x86, r); - define_fpu_moves(&mut e, shared_defs, r); - define_fpu_memory(&mut e, shared_defs, r); - define_fpu_ops(&mut e, shared_defs, settings, x86, r); - define_alu(&mut e, shared_defs, settings, x86, r); - define_simd(&mut e, shared_defs, settings, x86, r); - define_entity_ref(&mut e, shared_defs, settings, r); - define_control_flow(&mut e, shared_defs, settings, r); - define_reftypes(&mut e, shared_defs, r); - - let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr"); - let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr"); - - let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr"); - let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr"); - - e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0); - e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0); - - e -} diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs deleted file mode 100644 index 7acd2e2c50..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ /dev/null @@ -1,723 +0,0 @@ -#![allow(non_snake_case)] - -use crate::cdsl::instructions::{ - AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, -}; -use crate::cdsl::operands::Operand; -use crate::cdsl::types::ValueType; -use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; -use crate::shared::entities::EntityRefs; -use crate::shared::formats::Formats; -use crate::shared::immediates::Immediates; -use crate::shared::types; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define( - mut all_instructions: &mut AllInstructions, - formats: &Formats, - immediates: &Immediates, - entities: &EntityRefs, -) -> InstructionGroup { - let mut ig = InstructionGroupBuilder::new(&mut all_instructions); - - let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); - - let iWord = &TypeVar::new( - "iWord", - "A scalar integer machine word", - TypeSetBuilder::new().ints(32..64).build(), - ); - let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator"); - let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator"); - let d = &Operand::new("d", iWord).with_doc("Denominator"); - let q = &Operand::new("q", iWord).with_doc("Quotient"); - let r = &Operand::new("r", iWord).with_doc("Remainder"); - - ig.push( - Inst::new( - "x86_udivmodx", - r#" - Extended unsigned division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as an unsigned number and divide by the unsigned - denominator `d`. Trap when `d` is zero or if the quotient is larger - than the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - ig.push( - Inst::new( - "x86_sdivmodx", - r#" - Extended signed division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as a signed number and divide by the signed - denominator `d`. Trap when `d` is zero or if the quotient is outside - the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - let argL = &Operand::new("argL", iWord); - let argR = &Operand::new("argR", iWord); - let resLo = &Operand::new("resLo", iWord); - let resHi = &Operand::new("resHi", iWord); - - ig.push( - Inst::new( - "x86_umulx", - r#" - Unsigned integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - ig.push( - Inst::new( - "x86_smulx", - r#" - Signed integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - let Float = &TypeVar::new( - "Float", - "A scalar or vector floating point number", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .build(), - ); - let IntTo = &TypeVar::new( - "IntTo", - "An integer type with the same number of lanes", - TypeSetBuilder::new() - .ints(32..64) - .simd_lanes(Interval::All) - .build(), - ); - let x = &Operand::new("x", Float); - let a = &Operand::new("a", IntTo); - - ig.push( - Inst::new( - "x86_cvtt2si", - r#" - Convert with truncation floating point to signed integer. - - The source floating point operand is converted to a signed integer by - rounding towards zero. If the result can't be represented in the output - type, returns the smallest signed value the output type can represent. - - This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let f32x4 = &TypeVar::new( - "f32x4", - "A floating point number", - TypeSetBuilder::new() - .floats(32..32) - .simd_lanes(4..4) - .build(), - ); - let i32x4 = &TypeVar::new( - "i32x4", - "An integer type with the same number of lanes", - TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), - ); - let x = &Operand::new("x", i32x4); - let a = &Operand::new("a", f32x4); - - ig.push( - Inst::new( - "x86_vcvtudq2ps", - r#" - Convert unsigned integer to floating point. - - Convert packed doubleword unsigned integers to packed single-precision floating-point - values. This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", Float); - let a = &Operand::new("a", Float); - let y = &Operand::new("y", Float); - - ig.push( - Inst::new( - "x86_fmin", - r#" - Floating point minimum with x86 semantics. - - This is equivalent to the C ternary operator `x < y ? x : y` which - differs from `fmin` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as LT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_fmax", - r#" - Floating point maximum with x86 semantics. - - This is equivalent to the C ternary operator `x > y ? x : y` which - differs from `fmax` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as GT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", iWord); - - ig.push( - Inst::new( - "x86_push", - r#" - Pushes a value onto the stack. - - Decrements the stack pointer and stores the specified value on to the top. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .other_side_effects(true) - .can_store(true), - ); - - ig.push( - Inst::new( - "x86_pop", - r#" - Pops a value from the stack. - - Loads a value from the top of the stack and then increments the stack - pointer. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.nullary, - ) - .operands_out(vec![x]) - .other_side_effects(true) - .can_load(true), - ); - - let y = &Operand::new("y", iWord); - let rflags = &Operand::new("rflags", iflags); - - ig.push( - Inst::new( - "x86_bsr", - r#" - Bit Scan Reverse -- returns the bit-index of the most significant 1 - in the word. Result is undefined if the argument is zero. However, it - sets the Z flag depending on the argument, so it is at least easy to - detect and handle that case. - - This is polymorphic in i32 and i64. It is implemented for both i64 and - i32 in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - ig.push( - Inst::new( - "x86_bsf", - r#" - Bit Scan Forwards -- returns the bit-index of the least significant 1 - in the word. Is otherwise identical to 'bsr', just above. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - let uimm8 = &immediates.uimm8; - let TxN = &TypeVar::new( - "TxN", - "A SIMD vector type", - TypeSetBuilder::new() - .ints(Interval::All) - .floats(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details"); - - ig.push( - Inst::new( - "x86_pshufd", - r#" - Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended - register and re-orders the data according to the passed immediate byte. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pshufb", - r#" - Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle - mask from either memory or another extended register - "#, - &formats.binary, - ) - .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b"); - ig.push( - Inst::new( - "x86_pblendw", - r#" - Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a - lane in ``b``: if the bit is set, the lane is copied into ``a``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![a, b, mask]) - .operands_out(vec![a]), - ); - - let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); - let x = &Operand::new("x", TxN); - let a = &Operand::new("a", &TxN.lane_of()); - - ig.push( - Inst::new( - "x86_pextr", - r#" - Extract lane ``Idx`` from ``x``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![x, Idx]) - .operands_out(vec![a]), - ); - - let IBxN = &TypeVar::new( - "IBxN", - "A SIMD vector type containing only booleans and integers", - TypeSetBuilder::new() - .ints(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", IBxN); - let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", IBxN); - - ig.push( - Inst::new( - "x86_pinsr", - r#" - Insert ``y`` into ``x`` at lane ``Idx``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let FxN = &TypeVar::new( - "FxN", - "A SIMD vector type containing floats", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_insertps", - r#" - Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is - extracted from and which it is inserted to. This is similar to x86_pinsr but inserts - floats, which are already stored in an XMM register. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let a = &Operand::new("a", TxN); - - ig.push( - Inst::new( - "x86_punpckh", - r#" - Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_punpckl", - r#" - Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", FxN); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_movsd", - r#" - Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_movlhps", - r#" - Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let IxN = &TypeVar::new( - "IxN", - "A SIMD vector type containing integers", - TypeSetBuilder::new() - .ints(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let I128 = &TypeVar::new( - "I128", - "A SIMD vector type containing one large integer (due to Cranelift type constraints, \ - this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \ - upper lane is concatenated with the lower lane to form the integer)", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", IxN).with_doc("Vector value to shift"); - let y = &Operand::new("y", I128).with_doc("Number of bits to shift"); - let a = &Operand::new("a", IxN); - - ig.push( - Inst::new( - "x86_psll", - r#" - Shift Packed Data Left Logical -- This implements the behavior of the shared instruction - ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psrl", - r#" - Shift Packed Data Right Logical -- This implements the behavior of the shared instruction - ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psra", - r#" - Shift Packed Data Right Arithmetic -- This implements the behavior of the shared - instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by - the PSRA* family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let I64x2 = &TypeVar::new( - "I64x2", - "A SIMD vector type containing two 64-bit integers", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", I64x2); - let y = &Operand::new("y", I64x2); - let a = &Operand::new("a", I64x2); - ig.push( - Inst::new( - "x86_pmullq", - r#" - Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with - lane-wise wrapping if the result overflows. This instruction is necessary to add distinct - encodings for CPUs with newer vector features. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmuludq", - r#" - Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2 - unsigned integers and receive a 64x2 result. This instruction avoids the need for handling - overflow as in `x86_pmullq`. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let f = &Operand::new("f", iflags); - ig.push( - Inst::new( - "x86_ptest", - r#" - Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the - bitwise AND of the first source operand (first operand) and the second source operand - (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise - AND of the second source operand (second operand) and the logical NOT of the destination - operand (first operand). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![f]), - ); - - let x = &Operand::new("x", IxN); - let y = &Operand::new("y", IxN); - let a = &Operand::new("a", IxN); - ig.push( - Inst::new( - "x86_pmaxs", - r#" - Maximum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmaxu", - r#" - Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmins", - r#" - Minimum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pminu", - r#" - Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let c = &Operand::new("c", uimm8) - .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details"); - ig.push( - Inst::new( - "x86_palignr", - r#" - Concatenate destination and source operands, extracting a byte-aligned result shifted to - the right by `c`. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, c]) - .operands_out(vec![a]), - ); - - let i64_t = &TypeVar::new( - "i64_t", - "A scalar 64bit integer", - TypeSetBuilder::new().ints(64..64).build(), - ); - - let GV = &Operand::new("GV", &entities.global_value); - let addr = &Operand::new("addr", i64_t); - - ig.push( - Inst::new( - "x86_elf_tls_get_addr", - r#" - Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // This is a bit overly broad to mark as clobbering *all* the registers, because it should - // only preserve caller-saved registers. There's no way to indicate this to register - // allocation yet, though, so mark as clobbering all registers instead. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - ig.push( - Inst::new( - "x86_macho_tls_get_addr", - r#" - Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // See above comment for x86_elf_tls_get_addr. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - - ig.build() -} diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs deleted file mode 100644 index de78c3b3b7..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ /dev/null @@ -1,827 +0,0 @@ -use crate::cdsl::ast::{constant, var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::cdsl::xform::TransformGroupBuilder; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::Definitions as SharedDefinitions; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { - let mut expand = TransformGroupBuilder::new( - "x86_expand", - r#" - Legalize instructions by expansion. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("expand_flags").id); - - let mut narrow = TransformGroupBuilder::new( - "x86_narrow", - r#" - Legalize instructions by narrowing. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("narrow_flags").id); - - let mut narrow_avx = TransformGroupBuilder::new( - "x86_narrow_avx", - r#" - Legalize instructions by narrowing with CPU feature checks. - - This special case converts using x86 AVX instructions where available."#, - ) - .isa("x86"); - // We cannot chain with the x86_narrow group until this group is built, see bottom of this - // function for where this is chained. - - let mut widen = TransformGroupBuilder::new( - "x86_widen", - r#" - Legalize instructions by widening. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("widen").id); - - // List of instructions. - let insts = &shared.instructions; - let band = insts.by_name("band"); - let bor = insts.by_name("bor"); - let clz = insts.by_name("clz"); - let ctz = insts.by_name("ctz"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint = insts.by_name("fcvt_to_sint"); - let fcvt_to_uint = insts.by_name("fcvt_to_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let iadd = insts.by_name("iadd"); - let iconst = insts.by_name("iconst"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let isub = insts.by_name("isub"); - let ishl = insts.by_name("ishl"); - let ireduce = insts.by_name("ireduce"); - let popcnt = insts.by_name("popcnt"); - let sdiv = insts.by_name("sdiv"); - let selectif = insts.by_name("selectif"); - let smulhi = insts.by_name("smulhi"); - let srem = insts.by_name("srem"); - let tls_value = insts.by_name("tls_value"); - let udiv = insts.by_name("udiv"); - let umulhi = insts.by_name("umulhi"); - let ushr = insts.by_name("ushr"); - let ushr_imm = insts.by_name("ushr_imm"); - let urem = insts.by_name("urem"); - - let x86_bsf = x86_instructions.by_name("x86_bsf"); - let x86_bsr = x86_instructions.by_name("x86_bsr"); - let x86_umulx = x86_instructions.by_name("x86_umulx"); - let x86_smulx = x86_instructions.by_name("x86_smulx"); - - let imm = &shared.imm; - - // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce - // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is - // not encodable. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - for &ty in &[I8, I16, I32] { - let ishl_by_i64 = ishl.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ishl_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - for &ty in &[I8, I16, I32] { - let ushr_by_i64 = ushr.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ushr_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - // Division and remainder. - // - // The srem expansion requires custom code because srem INT_MIN, -1 is not - // allowed to trap. The other ops need to check avoid_div_traps. - expand.custom_legalize(sdiv, "expand_sdivrem"); - expand.custom_legalize(srem, "expand_sdivrem"); - expand.custom_legalize(udiv, "expand_udivrem"); - expand.custom_legalize(urem, "expand_udivrem"); - - // Double length (widening) multiplication. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let a1 = var("a1"); - let a2 = var("a2"); - let res_lo = var("res_lo"); - let res_hi = var("res_hi"); - - expand.legalize( - def!(res_hi = umulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_umulx(x, y))], - ); - - expand.legalize( - def!(res_hi = smulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_smulx(x, y))], - ); - - // Floating point condition codes. - // - // The 8 condition codes in `supported_floatccs` are directly supported by a - // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization - // patterns. - - let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq"); - let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord"); - let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq"); - let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne"); - let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno"); - let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one"); - - // Equality needs an explicit `ord` test which checks the parity bit. - expand.legalize( - def!(a = fcmp(floatcc_eq, x, y)), - vec![ - def!(a1 = fcmp(floatcc_ord, x, y)), - def!(a2 = fcmp(floatcc_ueq, x, y)), - def!(a = band(a1, a2)), - ], - ); - expand.legalize( - def!(a = fcmp(floatcc_ne, x, y)), - vec![ - def!(a1 = fcmp(floatcc_uno, x, y)), - def!(a2 = fcmp(floatcc_one, x, y)), - def!(a = bor(a1, a2)), - ], - ); - - let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt"); - let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt"); - let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le"); - let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge"); - let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt"); - let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult"); - let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge"); - let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule"); - - // Inequalities that need to be reversed. - for &(cc, rev_cc) in &[ - (floatcc_lt, floatcc_gt), - (floatcc_le, floatcc_ge), - (floatcc_ugt, floatcc_ult), - (floatcc_uge, floatcc_ule), - ] { - expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); - } - - // We need to modify the CFG for min/max legalization. - expand.custom_legalize(fmin, "expand_minmax"); - expand.custom_legalize(fmax, "expand_minmax"); - - // Conversions from unsigned need special handling. - expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); - // Conversions from float to int can trap and modify the control flow graph. - expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); - expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); - expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); - expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); - - // Count leading and trailing zeroes, for baseline x86_64 - let c_minus_one = var("c_minus_one"); - let c_thirty_one = var("c_thirty_one"); - let c_thirty_two = var("c_thirty_two"); - let c_sixty_three = var("c_sixty_three"); - let c_sixty_four = var("c_sixty_four"); - let index1 = var("index1"); - let r2flags = var("r2flags"); - let index2 = var("index2"); - - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - let imm64_minus_one = Literal::constant(&imm.imm64, -1); - let imm64_63 = Literal::constant(&imm.imm64, 63); - expand.legalize( - def!(a = clz.I64(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_sixty_three = iconst(imm64_63)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_sixty_three, index2)), - ], - ); - - let imm64_31 = Literal::constant(&imm.imm64, 31); - expand.legalize( - def!(a = clz.I32(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_thirty_one = iconst(imm64_31)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_thirty_one, index2)), - ], - ); - - let imm64_64 = Literal::constant(&imm.imm64, 64); - expand.legalize( - def!(a = ctz.I64(x)), - vec![ - def!(c_sixty_four = iconst(imm64_64)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), - ], - ); - - let imm64_32 = Literal::constant(&imm.imm64, 32); - expand.legalize( - def!(a = ctz.I32(x)), - vec![ - def!(c_thirty_two = iconst(imm64_32)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), - ], - ); - - // Population count for baseline x86_64 - let x = var("x"); - let r = var("r"); - - let qv3 = var("qv3"); - let qv4 = var("qv4"); - let qv5 = var("qv5"); - let qv6 = var("qv6"); - let qv7 = var("qv7"); - let qv8 = var("qv8"); - let qv9 = var("qv9"); - let qv10 = var("qv10"); - let qv11 = var("qv11"); - let qv12 = var("qv12"); - let qv13 = var("qv13"); - let qv14 = var("qv14"); - let qv15 = var("qv15"); - let qc77 = var("qc77"); - #[allow(non_snake_case)] - let qc0F = var("qc0F"); - let qc01 = var("qc01"); - - let imm64_1 = Literal::constant(&imm.imm64, 1); - let imm64_4 = Literal::constant(&imm.imm64, 4); - expand.legalize( - def!(r = popcnt.I64(x)), - vec![ - def!(qv3 = ushr_imm(x, imm64_1)), - def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))), - def!(qv4 = band(qv3, qc77)), - def!(qv5 = isub(x, qv4)), - def!(qv6 = ushr_imm(qv4, imm64_1)), - def!(qv7 = band(qv6, qc77)), - def!(qv8 = isub(qv5, qv7)), - def!(qv9 = ushr_imm(qv7, imm64_1)), - def!(qv10 = band(qv9, qc77)), - def!(qv11 = isub(qv8, qv10)), - def!(qv12 = ushr_imm(qv11, imm64_4)), - def!(qv13 = iadd(qv11, qv12)), - def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))), - def!(qv14 = band(qv13, qc0F)), - def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))), - def!(qv15 = imul(qv14, qc01)), - def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))), - ], - ); - - let lv3 = var("lv3"); - let lv4 = var("lv4"); - let lv5 = var("lv5"); - let lv6 = var("lv6"); - let lv7 = var("lv7"); - let lv8 = var("lv8"); - let lv9 = var("lv9"); - let lv10 = var("lv10"); - let lv11 = var("lv11"); - let lv12 = var("lv12"); - let lv13 = var("lv13"); - let lv14 = var("lv14"); - let lv15 = var("lv15"); - let lc77 = var("lc77"); - #[allow(non_snake_case)] - let lc0F = var("lc0F"); - let lc01 = var("lc01"); - - expand.legalize( - def!(r = popcnt.I32(x)), - vec![ - def!(lv3 = ushr_imm(x, imm64_1)), - def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))), - def!(lv4 = band(lv3, lc77)), - def!(lv5 = isub(x, lv4)), - def!(lv6 = ushr_imm(lv4, imm64_1)), - def!(lv7 = band(lv6, lc77)), - def!(lv8 = isub(lv5, lv7)), - def!(lv9 = ushr_imm(lv7, imm64_1)), - def!(lv10 = band(lv9, lc77)), - def!(lv11 = isub(lv8, lv10)), - def!(lv12 = ushr_imm(lv11, imm64_4)), - def!(lv13 = iadd(lv11, lv12)), - def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))), - def!(lv14 = band(lv13, lc0F)), - def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))), - def!(lv15 = imul(lv14, lc01)), - def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))), - ], - ); - - expand.custom_legalize(ineg, "convert_ineg"); - expand.custom_legalize(tls_value, "expand_tls_value"); - widen.custom_legalize(ineg, "convert_ineg"); - - // To reduce compilation times, separate out large blocks of legalizations by theme. - define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx); - - expand.build_and_add_to(&mut shared.transform_groups); - let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups); - narrow_avx - .chain_with(narrow_id) - .build_and_add_to(&mut shared.transform_groups); - widen.build_and_add_to(&mut shared.transform_groups); -} - -fn define_simd( - shared: &mut SharedDefinitions, - x86_instructions: &InstructionGroup, - narrow: &mut TransformGroupBuilder, - narrow_avx: &mut TransformGroupBuilder, -) { - let insts = &shared.instructions; - let band = insts.by_name("band"); - let band_not = insts.by_name("band_not"); - let bitcast = insts.by_name("bitcast"); - let bitselect = insts.by_name("bitselect"); - let bor = insts.by_name("bor"); - let bnot = insts.by_name("bnot"); - let bxor = insts.by_name("bxor"); - let extractlane = insts.by_name("extractlane"); - let fabs = insts.by_name("fabs"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let fneg = insts.by_name("fneg"); - let iadd_imm = insts.by_name("iadd_imm"); - let icmp = insts.by_name("icmp"); - let imax = insts.by_name("imax"); - let imin = insts.by_name("imin"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let insertlane = insts.by_name("insertlane"); - let ishl = insts.by_name("ishl"); - let ishl_imm = insts.by_name("ishl_imm"); - let raw_bitcast = insts.by_name("raw_bitcast"); - let scalar_to_vector = insts.by_name("scalar_to_vector"); - let splat = insts.by_name("splat"); - let shuffle = insts.by_name("shuffle"); - let sshr = insts.by_name("sshr"); - let swizzle = insts.by_name("swizzle"); - let trueif = insts.by_name("trueif"); - let uadd_sat = insts.by_name("uadd_sat"); - let umax = insts.by_name("umax"); - let umin = insts.by_name("umin"); - let snarrow = insts.by_name("snarrow"); - let swiden_high = insts.by_name("swiden_high"); - let swiden_low = insts.by_name("swiden_low"); - let ushr_imm = insts.by_name("ushr_imm"); - let ushr = insts.by_name("ushr"); - let uwiden_high = insts.by_name("uwiden_high"); - let uwiden_low = insts.by_name("uwiden_low"); - let vconst = insts.by_name("vconst"); - let vall_true = insts.by_name("vall_true"); - let vany_true = insts.by_name("vany_true"); - let vselect = insts.by_name("vselect"); - - let x86_palignr = x86_instructions.by_name("x86_palignr"); - let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); - let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); - let x86_pmins = x86_instructions.by_name("x86_pmins"); - let x86_pminu = x86_instructions.by_name("x86_pminu"); - let x86_pshufb = x86_instructions.by_name("x86_pshufb"); - let x86_pshufd = x86_instructions.by_name("x86_pshufd"); - let x86_psra = x86_instructions.by_name("x86_psra"); - let x86_ptest = x86_instructions.by_name("x86_ptest"); - let x86_punpckh = x86_instructions.by_name("x86_punpckh"); - let x86_punpckl = x86_instructions.by_name("x86_punpckl"); - - let imm = &shared.imm; - - // Set up variables and immediates. - let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); - let uimm8_one = Literal::constant(&imm.uimm8, 0x01); - let uimm8_eight = Literal::constant(&imm.uimm8, 8); - let u128_zeroes = constant(vec![0x00; 16]); - let u128_ones = constant(vec![0xff; 16]); - let u128_seventies = constant(vec![0x70; 16]); - let a = var("a"); - let b = var("b"); - let c = var("c"); - let d = var("d"); - let e = var("e"); - let f = var("f"); - let g = var("g"); - let h = var("h"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - // Limit the SIMD vector size: eventually multiple vector sizes may be supported - // but for now only SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // SIMD splat: 8-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any8x16(x)), - vec![ - // Move into the lowest 8 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Zero out a different XMM register; the shuffle mask for moving the lowest byte - // to all other byte lanes is 0x0. - def!(b = vconst(u128_zeroes)), - // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b). - def!(y = x86_pshufb(a, b)), - ], - ); - } - - // SIMD splat: 16-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); - let raw_bitcast_any16x8_to_i32x4 = raw_bitcast - .bind(vector(I32, sse_vector_size)) - .bind(vector(ty, sse_vector_size)); - let raw_bitcast_i32x4_to_any16x8 = raw_bitcast - .bind(vector(ty, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - narrow.legalize( - def!(y = splat_x16x8(x)), - vec![ - // Move into the lowest 16 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Insert the value again but in the next lowest 16 bits. - def!(b = insertlane(a, x, uimm8_one)), - // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD. - def!(c = raw_bitcast_any16x8_to_i32x4(b)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(d = x86_pshufd(c, uimm8_zero)), - // No instruction emitted; pretend this is an X16x8 again. - def!(y = raw_bitcast_i32x4_to_any16x8(d)), - ], - ); - } - - // SIMD splat: 32-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any32x4(x)), - vec![ - // Translate to an x86 MOV to get the value in an XMM register. - def!(a = scalar_to_vector(x)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(y = x86_pshufd(a, uimm8_zero)), - ], - ); - } - - // SIMD splat: 64-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { - let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any64x2(x)), - vec![ - // Move into the lowest 64 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Move into the highest 64 bits of the same XMM register. - def!(y = insertlane(a, x, uimm8_one)), - ], - ); - } - - // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring - // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion, - // see https://github.com/WebAssembly/simd/issues/93. - { - let swizzle = swizzle.bind(vector(I8, sse_vector_size)); - narrow.legalize( - def!(a = swizzle(x, y)), - vec![ - def!(b = vconst(u128_seventies)), - def!(c = uadd_sat(y, b)), - def!(a = x86_pshufb(x, c)), - ], - ); - } - - // SIMD bnot - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bnot = bnot.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = bnot(x)), - vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], - ); - } - - // SIMD shift right (arithmetic, i16x8 and i32x4) - for ty in &[I16, I32] { - let sshr = sshr.bind(vector(*ty, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - narrow.legalize( - def!(a = sshr(x, y)), - vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))], - ); - } - // SIMD shift right (arithmetic, i8x16) - { - let sshr = sshr.bind(vector(I8, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size)); - let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size)); - narrow.legalize( - def!(z = sshr(x, y)), - vec![ - // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits. - def!(a = iadd_imm(y, uimm8_eight)), - def!(b = bitcast_i64x2(a)), - // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(c = x86_punpckl(x, x)), - def!(d = raw_bitcast_i16x8(c)), - def!(e = x86_psra(d, b)), - // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(f = x86_punpckh(x, x)), - def!(g = raw_bitcast_i16x8_again(f)), - def!(h = x86_psra(g, b)), - // Re-pack the vector. - def!(z = snarrow(e, h)), - ], - ); - } - // SIMD shift right (arithmetic, i64x2) - { - let sshr_vector = sshr.bind(vector(I64, sse_vector_size)); - let sshr_scalar_lane0 = sshr.bind(I64); - let sshr_scalar_lane1 = sshr.bind(I64); - narrow.legalize( - def!(z = sshr_vector(x, y)), - vec![ - // Use scalar operations to shift the first lane. - def!(a = extractlane(x, uimm8_zero)), - def!(b = sshr_scalar_lane0(a, y)), - def!(c = insertlane(x, b, uimm8_zero)), - // Do the same for the second lane. - def!(d = extractlane(x, uimm8_one)), - def!(e = sshr_scalar_lane1(d, y)), - def!(z = insertlane(c, e, uimm8_one)), - ], - ); - } - - // SIMD select - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c - narrow.legalize( - def!(d = bitselect(c, x, y)), - vec![ - def!(a = band(x, c)), - def!(b = band_not(y, c)), - def!(d = bor(a, b)), - ], - ); - } - - // SIMD vselect; replace with bitselect if BLEND* instructions are not available. - // This works, because each lane of boolean vector is filled with zeroes or ones. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vselect = vselect.bind(vector(ty, sse_vector_size)); - let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(d = vselect(c, x, y)), - vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))], - ); - } - - // SIMD vany_true - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vany_true = vany_true.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = vany_true(x)), - vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], - ); - } - - // SIMD vall_true - let eq = Literal::enumerator_for(&imm.intcc, "eq"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vall_true = vall_true.bind(vector(ty, sse_vector_size)); - if ty.is_int() { - // In the common case (Wasm's integer-only all_true), we do not require a - // bitcast. - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(c = icmp(eq, x, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } else { - // However, to support other types we must bitcast them to an integer vector to - // use icmp. - let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); - let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(b = raw_bitcast_to_int(x)), - def!(c = icmp(eq, b, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } - } - - // SIMD icmp ne - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) { - let icmp_ = icmp.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ne, a, b)), - vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))], - ); - } - - // SIMD icmp greater-/less-than - let sgt = Literal::enumerator_for(&imm.intcc, "sgt"); - let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); - let sge = Literal::enumerator_for(&imm.intcc, "sge"); - let uge = Literal::enumerator_for(&imm.intcc, "uge"); - let slt = Literal::enumerator_for(&imm.intcc, "slt"); - let ult = Literal::enumerator_for(&imm.intcc, "ult"); - let sle = Literal::enumerator_for(&imm.intcc, "sle"); - let ule = Literal::enumerator_for(&imm.intcc, "ule"); - for ty in &[I8, I16, I32] { - // greater-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ugt, a, b)), - vec![ - def!(x = x86_pmaxu(a, b)), - def!(y = icmp(eq, x, b)), - def!(c = bnot(y)), - ], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(sge, a, b)), - vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(uge, a, b)), - vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], - ); - - // less-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); - } - - // SIMD integer min/max - for ty in &[I8, I16, I32] { - let imin = imin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]); - let umin = umin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]); - let imax = imax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]); - let umax = umax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]); - } - - // SIMD fcmp greater-/less-than - let gt = Literal::enumerator_for(&imm.floatcc, "gt"); - let lt = Literal::enumerator_for(&imm.floatcc, "lt"); - let ge = Literal::enumerator_for(&imm.floatcc, "ge"); - let le = Literal::enumerator_for(&imm.floatcc, "le"); - let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); - let ult = Literal::enumerator_for(&imm.floatcc, "ult"); - let uge = Literal::enumerator_for(&imm.floatcc, "uge"); - let ule = Literal::enumerator_for(&imm.floatcc, "ule"); - for ty in &[F32, F64] { - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); - } - - for ty in &[F32, F64] { - let fneg = fneg.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fneg(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = bxor(a, e)), // Flip the MSB. - ], - ); - } - - // SIMD fabs - for ty in &[F32, F64] { - let fabs = fabs.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fabs(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = band(a, e)), // Unset the MSB. - ], - ); - } - - // SIMD widen - for ty in &[I8, I16] { - let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = swiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = swiden_low(c)), - ], - ); - let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = uwiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = uwiden_low(c)), - ], - ); - } - - narrow.custom_legalize(shuffle, "convert_shuffle"); - narrow.custom_legalize(extractlane, "convert_extractlane"); - narrow.custom_legalize(insertlane, "convert_insertlane"); - narrow.custom_legalize(ineg, "convert_ineg"); - narrow.custom_legalize(ushr, "convert_ushr"); - narrow.custom_legalize(ishl, "convert_ishl"); - narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector"); - narrow.custom_legalize(fmin, "expand_minmax_vector"); - narrow.custom_legalize(fmax, "expand_minmax_vector"); - - narrow_avx.custom_legalize(imul, "convert_i64x2_imul"); - narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector"); - narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector"); -} diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 26c833a77f..7c3e4c6877 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -1,87 +1,25 @@ -use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::types::{ReferenceType, VectorType}; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegsBuilder; -use crate::shared::types::Bool::B1; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; use crate::shared::Definitions as SharedDefinitions; -mod encodings; -mod instructions; -mod legalize; -mod opcodes; -mod recipes; -mod registers; pub(crate) mod settings; pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = settings::define(&shared_defs.settings); - let regs = registers::define(); - let inst_group = instructions::define( - &mut shared_defs.all_instructions, - &shared_defs.formats, - &shared_defs.imm, - &shared_defs.entities, - ); - legalize::define(shared_defs, &inst_group); + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); - // CPU modes for 32-bit and 64-bit operations. - let mut x86_64 = CpuMode::new("I64"); - let mut x86_32 = CpuMode::new("I32"); - - let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); - let x86_widen = shared_defs.transform_groups.by_name("x86_widen"); - let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow"); - let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx"); - let x86_expand = shared_defs.transform_groups.by_name("x86_expand"); - - x86_32.legalize_monomorphic(expand_flags); - x86_32.legalize_default(x86_narrow); - x86_32.legalize_type(B1, expand_flags); - x86_32.legalize_type(I8, x86_widen); - x86_32.legalize_type(I16, x86_widen); - x86_32.legalize_type(I32, x86_expand); - x86_32.legalize_value_type(ReferenceType(R32), x86_expand); - x86_32.legalize_type(F32, x86_expand); - x86_32.legalize_type(F64, x86_expand); - x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - x86_64.legalize_monomorphic(expand_flags); - x86_64.legalize_default(x86_narrow); - x86_64.legalize_type(B1, expand_flags); - x86_64.legalize_type(I8, x86_widen); - x86_64.legalize_type(I16, x86_widen); - x86_64.legalize_type(I32, x86_expand); - x86_64.legalize_type(I64, x86_expand); - x86_64.legalize_value_type(ReferenceType(R64), x86_expand); - x86_64.legalize_type(F32, x86_expand); - x86_64.legalize_type(F64, x86_expand); - x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - let recipes = recipes::define(shared_defs, &settings, ®s); - - let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); - x86_32.set_encodings(encodings.enc32); - x86_64.set_encodings(encodings.enc64); - let encodings_predicates = encodings.inst_pred_reg.extract(); - - let recipes = encodings.recipes; - - let cpu_modes = vec![x86_64, x86_32]; + let cpu_modes = vec![]; TargetIsa::new( "x86", settings, - regs, - recipes, + IsaRegsBuilder::new().build(), + Recipes::new(), cpu_modes, - encodings_predicates, + InstructionPredicateMap::new(), ) } diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs deleted file mode 100644 index 2e72a1744d..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ /dev/null @@ -1,721 +0,0 @@ -//! Static, named definitions of instruction opcodes. - -/// Empty opcode for use as a default. -pub static EMPTY: [u8; 0] = []; - -/// Add with carry flag r{16,32,64} to r/m of the same size. -pub static ADC: [u8; 1] = [0x11]; - -/// Add r{16,32,64} to r/m of the same size. -pub static ADD: [u8; 1] = [0x01]; - -/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. -pub static ADD_IMM: [u8; 1] = [0x81]; - -/// Add sign-extended imm8 to r/m{16,32,64}. -pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE2). -pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58]; - -/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE). -pub static ADDPS: [u8; 2] = [0x0f, 0x58]; - -/// Add the low double-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; - -/// Add the low single-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; - -/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). -pub static AND: [u8; 1] = [0x21]; - -/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. -pub static AND_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} AND sign-extended imm8. -pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical AND NOT of packed single-precision floating-point -/// values in xmm1 and xmm2/mem. -pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; - -/// Return the bitwise logical AND of packed single-precision floating-point values -/// in xmm1 and xmm2/mem. -pub static ANDPS: [u8; 2] = [0x0f, 0x54]; - -/// Bit scan forward (stores index of first encountered 1 from the front). -pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; - -/// Bit scan reverse (stores index of first encountered 1 from the back). -pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; - -/// Select packed single-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14]; - -/// Select packed double-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15]; - -/// Call near, relative, displacement relative to next instruction (sign-extended). -pub static CALL_RELATIVE: [u8; 1] = [0xe8]; - -/// Move r/m{16,32,64} if overflow (OF=1). -pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; - -/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). -pub static CMP_IMM: [u8; 1] = [0x81]; - -/// Compare imm8 with r/m{16,32,64}. -pub static CMP_IMM8: [u8; 1] = [0x83]; - -/// Compare r{16,32,64} with r/m of the same size. -pub static CMP_REG: [u8; 1] = [0x39]; - -/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE2). -pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; - -/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE). -pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; - -/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision -/// floating-point values in xmm1 (SSE2). -pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b]; - -/// Convert scalar double-precision floating-point value to scalar single-precision -/// floating-point value. -pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; - -/// Convert doubleword integer to scalar double-precision floating-point value. -pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; - -/// Convert doubleword integer to scalar single-precision floating-point value. -pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; - -/// Convert scalar single-precision floating-point value to scalar double-precision -/// float-point value. -pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; - -/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed -/// doubleword values in xmm1 using truncation (SSE2). -pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b]; - -/// Convert with truncation scalar double-precision floating-point value to signed -/// integer. -pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; - -/// Convert with truncation scalar single-precision floating-point value to integer. -pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; - -/// Unsigned divide for {16,32,64}-bit. -pub static DIV: [u8; 1] = [0xf7]; - -/// Divide packed double-precision floating-point values in xmm1 by packed double-precision -/// floating-point values in xmm2/mem (SSE2). -pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e]; - -/// Divide packed single-precision floating-point values in xmm1 by packed single-precision -/// floating-point values in xmm2/mem (SSE). -pub static DIVPS: [u8; 2] = [0x0f, 0x5e]; - -/// Divide low double-precision floating-point value in xmm1 by low double-precision -/// floating-point value in xmm2/m64. -pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; - -/// Divide low single-precision floating-point value in xmm1 by low single-precision -/// floating-point value in xmm2/m32. -pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; - -/// Signed divide for {16,32,64}-bit. -pub static IDIV: [u8; 1] = [0xf7]; - -/// Signed multiply for {16,32,64}-bit, generic registers. -pub static IMUL: [u8; 2] = [0x0f, 0xaf]; - -/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. -pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; - -/// Insert scalar single-precision floating-point value. -pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; - -/// Either: -/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. -/// 2. Jump far, absolute indirect, address given in m16:64. -pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; - -/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. -pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; - -/// Jump near (rel32) if overflow (OF=1). -pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; - -/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. -pub static JUMP_SHORT: [u8; 1] = [0xeb]; - -/// Jump short (rel8) if equal (ZF=1). -pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; - -/// Jump short (rel8) if not equal (ZF=0). -pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; - -/// Jump short (rel8) if overflow (OF=1). -pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; - -/// Store effective address for m in register r{16,32,64}. -pub static LEA: [u8; 1] = [0x8d]; - -/// Count the number of leading zero bits. -pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; - -/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f]; - -/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MAXPS: [u8; 2] = [0x0f, 0x5f]; - -/// Return the maximum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; - -/// Return the maximum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; - -/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d]; - -/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MINPS: [u8; 2] = [0x0f, 0x5d]; - -/// Return the minimum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; - -/// Return the minimum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; - -/// Move r8 to r/m8. -pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; - -/// Move imm{16,32,64} to same-sized register. -pub static MOV_IMM: [u8; 1] = [0xb8]; - -/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. -pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; - -/// Move {r/m16, r/m32, r/m64} to same-sized register. -pub static MOV_LOAD: [u8; 1] = [0x8b]; - -/// Move r16 to r/m16. -pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; - -/// Move {r16, r32, r64} to same-sized register or memory. -pub static MOV_STORE: [u8; 1] = [0x89]; - -/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). -pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; - -/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. -pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; - -/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. -pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; - -/// Move packed single-precision floating-point values low to high (SSE). -pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; - -/// Move scalar double-precision floating-point value (from reg/mem to reg). -pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; - -/// Move scalar double-precision floating-point value (from reg to reg/mem). -pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point value (from reg to reg/mem). -pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point-value (from reg/mem to reg). -pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; - -/// Move byte to register with sign-extension. -pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; - -/// Move word to register with sign-extension. -pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; - -/// Move doubleword to register with sign-extension. -pub static MOVSXD: [u8; 1] = [0x63]; - -/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). -pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; - -/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). -pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; - -/// Move byte to register with zero-extension. -pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; - -/// Move word to register with zero-extension. -pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; - -/// Unsigned multiply for {16,32,64}-bit. -pub static MUL: [u8; 1] = [0xf7]; - -/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE2). -pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59]; - -/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE). -pub static MULPS: [u8; 2] = [0x0f, 0x59]; - -/// Multiply the low double-precision floating-point value in xmm2/m64 by the -/// low double-precision floating-point value in xmm1. -pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; - -/// Multiply the low single-precision floating-point value in xmm2/m32 by the -/// low single-precision floating-point value in xmm1. -pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; - -/// Reverse each bit of r/m{16,32,64}. -pub static NOT: [u8; 1] = [0xf7]; - -/// r{16,32,64} OR register of same size. -pub static OR: [u8; 1] = [0x09]; - -/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. -pub static OR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} OR sign-extended imm8. -pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). -pub static ORPS: [u8; 2] = [0x0f, 0x56]; - -/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3). -pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c]; - -/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e]; - -/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte -/// integers in xmm1 using signed saturation (SSE2). -pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed -/// word integers in xmm1 using signed saturation (SSE2). -pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte -/// integers in xmm1 using unsigned saturation (SSE2). -pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed -/// word integers in xmm1 using unsigned saturation (SSE4.1). -pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b]; - -/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; - -/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; - -/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; - -/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; - -/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; - -/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; - -/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; - -/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; - -/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is -/// shifted to the right by the constant number of bytes in imm8 (SSSE3). -pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f]; - -/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). -pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; - -/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). -pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; - -/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; - -/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; - -/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte -/// in XMM0 and store the values into xmm1 (SSE4.1). -pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10]; - -/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1 -/// (SSE4.1). -pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; - -/// Compare packed data for equal (SSE4.1). -pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; - -/// Compare packed signed byte integers for greater than (SSE2). -pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; - -/// Compare packed signed doubleword integers for greater than (SSE2). -pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; - -/// Compare packed signed quadword integers for greater than (SSE4.2). -pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; - -/// Compare packed signed word integers for greater than (SSE2). -pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; - -/// Extract doubleword or quadword, depending on REX.W (SSE4.1). -pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; - -/// Extract byte (SSE4.1). -pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; - -/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. -pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; - -/// Insert doubleword or quadword, depending on REX.W (SSE4.1). -pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; - -/// Insert byte (SSE4.1). -pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; - -/// Insert word (SSE2). -pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; - -/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; - -/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; - -/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; - -/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; - -/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; - -/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; - -/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of -/// the results in xmm1 (SSE2). -pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; - -/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 -/// bits of each product in xmm1 (SSE4.1). -pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64 -/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding. -pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers -/// in xmm2/m128, and store the quadword results in xmm1 (SSE2). -pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4]; - -/// Multiply the packed word integers, add adjacent doubleword results. -pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5]; - -/// Pop top of stack into r{16,32,64}; increment stack pointer. -pub static POP_REG: [u8; 1] = [0x58]; - -/// Returns the count of number of bits set to 1. -pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; - -/// Bitwise OR of xmm2/m128 and xmm1 (SSE2). -pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb]; - -/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). -pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; - -/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and -/// store the result in xmm1 (SSE2). -pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; - -/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; - -/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; - -/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; - -/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; - -/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2]; - -/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2]; - -/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2]; - -/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). -pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; - -/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). -pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; - -/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). -pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; - -/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). -pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; - -/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8]; - -/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9]; - -/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; - -/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; - -/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all -/// 0s (SSE4.1). -pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; - -/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68]; - -/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69]; - -/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A]; - -/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D]; - -/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60]; - -/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61]; - -/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62]; - -/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C]; - -/// Push r{16,32,64}. -pub static PUSH_REG: [u8; 1] = [0x50]; - -/// Logical exclusive OR (SSE2). -pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; - -/// Near return to calling procedure. -pub static RET_NEAR: [u8; 1] = [0xc3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_CL: [u8; 1] = [0xd3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_IMM8: [u8; 1] = [0xc1]; - -/// Round scalar doubl-precision floating-point values. -pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; - -/// Round scalar single-precision floating-point values. -pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; - -/// Subtract with borrow r{16,32,64} from r/m of the same size. -pub static SBB: [u8; 1] = [0x19]; - -/// Set byte if overflow (OF=1). -pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE2). -pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE). -pub static SQRTPS: [u8; 2] = [0x0f, 0x51]; - -/// Compute square root of scalar double-precision floating-point value. -pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; - -/// Compute square root of scalar single-precision value. -pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; - -/// Subtract r{16,32,64} from r/m of same size. -pub static SUB: [u8; 1] = [0x29]; - -/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE2). -pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c]; - -/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE). -pub static SUBPS: [u8; 2] = [0x0f, 0x5c]; - -/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 -/// and store the result in xmm1. -pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; - -/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 -/// and store the result in xmm1. -pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; - -/// AND r8 with r/m8; set SF, ZF, PF according to result. -pub static TEST_BYTE_REG: [u8; 1] = [0x84]; - -/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. -pub static TEST_REG: [u8; 1] = [0x85]; - -/// Count the number of trailing zero bits. -pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; - -/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 -/// and set the EFLAGS flags accordingly. -pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; - -/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 -/// and set the EFLAGS flags accordingly. -pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; - -/// Raise invalid opcode instruction. -pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; - -/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed -/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior -/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode -/// (AVX512VL, AVX512F). -pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; - -/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. -pub static XOR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} XOR sign-extended imm8. -pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// r/m{16,32,64} XOR register of the same size. -pub static XOR: [u8; 1] = [0x31]; - -/// Bitwise logical XOR of packed double-precision floating-point values. -pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; - -/// Bitwise logical XOR of packed single-precision floating-point values. -pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs deleted file mode 100644 index f45f8dc673..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ /dev/null @@ -1,3445 +0,0 @@ -//! Encoding recipes for x86/x86_64. -use std::rc::Rc; - -use cranelift_codegen_shared::isa::x86::EncodingBits; - -use crate::cdsl::ast::Literal; -use crate::cdsl::formats::InstructionFormat; -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::recipes::{ - EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, -}; -use crate::cdsl::regs::IsaRegs; -use crate::cdsl::settings::SettingGroup; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes; - -/// Helper data structure to create recipes and template recipes. -/// It contains all the recipes and recipe templates that might be used in the encodings crate of -/// this same directory. -pub(crate) struct RecipeGroup<'builder> { - /// Memoized registers description, to pass it to builders later. - regs: &'builder IsaRegs, - - /// All the recipes explicitly created in this file. This is different from the final set of - /// recipes, which is definitive only once encodings have generated new recipes on the fly. - recipes: Vec, - - /// All the recipe templates created in this file. - templates: Vec>>, -} - -impl<'builder> RecipeGroup<'builder> { - fn new(regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipes: Vec::new(), - templates: Vec::new(), - } - } - fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { - self.recipes.push(recipe.build()); - } - fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc> { - let template = Rc::new(Template::new(recipe, self.regs)); - self.templates.push(template.clone()); - template - } - fn add_template_inferred( - &mut self, - recipe: EncodingRecipeBuilder, - infer_function: &'static str, - ) -> Rc> { - let template = - Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function)); - self.templates.push(template.clone()); - template - } - fn add_template(&mut self, template: Template<'builder>) -> Rc> { - let template = Rc::new(template); - self.templates.push(template.clone()); - template - } - pub fn recipe(&self, name: &str) -> &EncodingRecipe { - self.recipes - .iter() - .find(|recipe| recipe.name == name) - .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name)) - } - pub fn template(&self, name: &str) -> &Template { - self.templates - .iter() - .find(|recipe| recipe.name() == name) - .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) - } -} - -// Opcode representation. -// -// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are -// variable length, so we use separate recipes for different styles of opcodes and prefixes. The -// opcode format is indicated by the recipe name prefix. -// -// The match case below does not include the REX prefix which goes after the mandatory prefix. -// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are -// represented by separate recipes. -// -// The encoding bits are: -// -// 0-7: The opcode byte . -// 8-9: pp, mandatory prefix: -// 00 none (Op*) -// 01 66 (Mp*) -// 10 F3 (Mp*) -// 11 F2 (Mp*) -// 10-11: mm, opcode map: -// 00 (Op1/Mp1) -// 01 0F (Op2/Mp2) -// 10 0F 38 (Op3/Mp3) -// 11 0F 3A (Op3/Mp3) -// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -// 15: REX.W bit (or VEX.W/E) -// -// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and -// the pp+mm format is ready for supporting VEX prefixes. -// -// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this -// could be simplified. - -/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. -fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { - let enc = EncodingBits::new(op_bytes, rrr, w); - (enc.prefix().recipe_name_prefix(), enc.bits()) -} - -/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the -/// corresponding `put_*` function from the `binemit.rs` module. -fn replace_put_op(code: Option, prefix: &str) -> Option { - code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) -} - -/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. -fn replace_nonrex_constraints( - regs: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - let new_rc_index = if rc_index == regs.class_by_name("GPR") { - regs.class_by_name("GPR8") - } else if rc_index == regs.class_by_name("FPR") { - regs.class_by_name("FPR8") - } else { - rc_index - }; - OperandConstraint::RegClass(new_rc_index) - } - _ => constraint, - }) - .collect() -} - -fn replace_evex_constraints( - _: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in - // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the - // rc_index conversion to FPR32. In the meantime, this is effectively a no-op - // conversion--the register class stays the same. - OperandConstraint::RegClass(rc_index) - } - _ => constraint, - }) - .collect() -} - -/// Specifies how the prefix (e.g. REX) is emitted by a Recipe. -#[derive(Copy, Clone, PartialEq)] -pub enum RecipePrefixKind { - /// The REX emission behavior is not hardcoded for the Recipe - /// and may be overridden when using the Template. - Unspecified, - - /// The Recipe must hardcode the non-emission of the REX prefix. - NeverEmitRex, - - /// The Recipe must hardcode the emission of the REX prefix. - AlwaysEmitRex, - - /// The Recipe should infer the emission of the REX.RXB bits from registers, - /// and the REX.W bit from the EncodingBits. - /// - /// Because such a Recipe has a non-constant instruction size, it must have - /// a special `compute_size` handler for the inferrable-REX case. - InferRex, - - /// The Recipe must hardcode the emission of an EVEX prefix. - Evex, -} - -impl Default for RecipePrefixKind { - fn default() -> Self { - Self::Unspecified - } -} - -/// Previously called a TailRecipe in the Python meta language, this allows to create multiple -/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different -/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating -/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be -/// reconsidered later. -#[derive(Clone)] -pub(crate) struct Template<'builder> { - /// Description of registers, used in the build() method. - regs: &'builder IsaRegs, - - /// The recipe template, which is to be specialized (by copy). - recipe: EncodingRecipeBuilder, - - /// How is the REX prefix emitted? - rex_kind: RecipePrefixKind, - - /// Function for `compute_size()` when REX is inferrable. - inferred_rex_compute_size: Option<&'static str>, - - /// Other recipe to use when REX-prefixed. - when_prefixed: Option>>, - - // Parameters passed in the EncodingBits. - /// Value of the W bit (0 or 1), stored in the EncodingBits. - w_bit: u16, - /// Value of the RRR bits (between 0 and 0b111). - rrr_bits: u16, - /// Opcode bytes. - op_bytes: &'static [u8], -} - -impl<'builder> Template<'builder> { - fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipe, - rex_kind: RecipePrefixKind::default(), - inferred_rex_compute_size: None, - when_prefixed: None, - w_bit: 0, - rrr_bits: 0, - op_bytes: &opcodes::EMPTY, - } - } - - fn name(&self) -> &str { - &self.recipe.name - } - fn rex_kind(self, kind: RecipePrefixKind) -> Self { - Self { - rex_kind: kind, - ..self - } - } - fn inferred_rex_compute_size(self, function: &'static str) -> Self { - Self { - inferred_rex_compute_size: Some(function), - ..self - } - } - fn when_prefixed(self, template: Rc>) -> Self { - assert!(self.when_prefixed.is_none()); - Self { - when_prefixed: Some(template), - ..self - } - } - - // Copy setters. - pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { - assert!(!op_bytes.is_empty()); - let mut copy = self.clone(); - copy.op_bytes = op_bytes; - copy - } - pub fn w(&self) -> Self { - let mut copy = self.clone(); - copy.w_bit = 1; - copy - } - pub fn rrr(&self, value: u16) -> Self { - assert!(value <= 0b111); - let mut copy = self.clone(); - copy.rrr_bits = value; - copy - } - pub fn nonrex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::AlwaysEmitRex, - "Template requires REX prefix." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::NeverEmitRex; - copy - } - pub fn rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - if let Some(prefixed) = &self.when_prefixed { - let mut ret = prefixed.rex(); - // Forward specialized parameters. - ret.op_bytes = self.op_bytes; - ret.w_bit = self.w_bit; - ret.rrr_bits = self.rrr_bits; - return ret; - } - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::AlwaysEmitRex; - copy - } - pub fn infer_rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - assert!( - self.when_prefixed.is_none(), - "infer_rex used with when_prefixed()." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::InferRex; - copy - } - - pub fn build(mut self) -> (EncodingRecipe, u16) { - let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); - - let (recipe_name, size_addendum) = match self.rex_kind { - RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => { - // Ensure the operands are limited to non-REX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = - Some(replace_nonrex_constraints(self.regs, operands_out)); - - (opcode.into(), self.op_bytes.len() as u64) - } - RecipePrefixKind::AlwaysEmitRex => { - ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1) - } - RecipePrefixKind::InferRex => { - assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead."); - // Hook up the right function for inferred compute_size(). - assert!( - self.inferred_rex_compute_size.is_some(), - "InferRex recipe '{}' needs an inferred_rex_compute_size function.", - &self.recipe.name - ); - self.recipe.compute_size = self.inferred_rex_compute_size; - - ("DynRex".to_string() + opcode, self.op_bytes.len() as u64) - } - RecipePrefixKind::Evex => { - // Allow the operands to expand limits to EVEX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out)); - - ("Evex".to_string() + opcode, 4 + 1) - } - }; - - self.recipe.base_size += size_addendum; - - // Branch ranges are relative to the end of the instruction. - // For InferRex, the range should be the minimum, assuming no REX. - if let Some(range) = self.recipe.branch_range.as_mut() { - range.inst_size += size_addendum; - } - - self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); - self.recipe.name = recipe_name + &self.recipe.name; - - (self.recipe.build(), bits) - } -} - -/// Returns a predicate checking that the "cond" field of the instruction contains one of the -/// directly supported floating point condition codes. -fn supported_floatccs_predicate( - supported_cc: &[Literal], - format: &InstructionFormat, -) -> InstructionPredicate { - supported_cc - .iter() - .fold(InstructionPredicate::new(), |pred, literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "cond", - literal.to_rust_code(), - )) - }) -} - -/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. -fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { - ["1", "2", "4", "8"] - .iter() - .fold(InstructionPredicate::new(), |pred, &literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "imm", - literal.into(), - )) - }) -} - -pub(crate) fn define<'shared>( - shared_defs: &'shared SharedDefinitions, - settings: &'shared SettingGroup, - regs: &'shared IsaRegs, -) -> RecipeGroup<'shared> { - // The set of floating point condition codes that are directly supported. - // Other condition codes need to be reversed or expressed as two tests. - let floatcc = &shared_defs.imm.floatcc; - let supported_floatccs: Vec = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - - // Register classes shorthands. - let abcd = regs.class_by_name("ABCD"); - let gpr = regs.class_by_name("GPR"); - let fpr = regs.class_by_name("FPR"); - let flag = regs.class_by_name("FLAG"); - - // Operand constraints shorthands. - let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); - let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); - let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); - let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); - let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15")); - let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0")); - - // Stack operand with a 32-bit signed displacement from either RBP or RSP. - let stack_gpr32 = Stack::new(gpr); - let stack_fpr32 = Stack::new(fpr); - - let formats = &shared_defs.formats; - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Definitions. - let mut recipes = RecipeGroup::new(regs); - - // A null unary instruction that takes a GPR register. Can be used for identity copies and - // no-op conversions. - recipes.add_recipe( - EncodingRecipeBuilder::new("null", &formats.unary, 0) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![stack_gpr32]) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0) - .operands_out(vec![reg_r15]) - .emit(""), - ); - // umr with a fixed register output that's r15. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let r15 = RU::r15.into(); - {{PUT_OP}}(bits, rex2(r15, in_reg0), sink); - modrm_rr(r15, in_reg0, sink); - "#, - ), - ); - - // No-op fills, created by late-stage redundant-fill removal. - recipes.add_recipe( - EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"), - ); - - // XX opcode, no ModR/M. - recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit( - r#" - sink.trap(code, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - "#, - )); - - // Macro: conditional jump over a ud2. - recipes.add_recipe( - EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_cond_trap, - )) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - // XX /r - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rr", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with operands swapped. (RM form). - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rrx", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with input operands swapped. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fax", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![1]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - // The operand order does not matter for calculating whether a REX prefix is needed. - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with a byte immediate. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - // XX /n for a unary operation with extension bits. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("ur", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // XX /r, but for a unary operator with separate input/output register, like - // copies. MR form, preserving flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("umr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Same as umr, but with FPR -> GPR registers. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfumr", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - ); - - // Same as umr, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, src), sink); - modrm_rr(out_reg0, src, sink); - "#, - ), - ); - - // XX /r, but for a unary operator with separate input/output register. - // RM form. Clobbers FLAGS. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm, but doesn't clobber FLAGS. - let urm_noflags = recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm_noflags, but input limited to ABCD. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1) - .operands_in(vec![abcd]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .when_prefixed(urm_noflags), - ); - - // XX /r, RM form, FPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("furm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // Same as furm, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, out_reg0), sink); - modrm_rr(src, out_reg0, sink); - "#, - ), - ); - - // XX /r, RM form, GPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("frurm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // XX /r, RM form, FPR -> GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfurm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .isa_predicate(use_sse41) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - sink.put1(match opcode { - Opcode::Nearest => 0b00, - Opcode::Floor => 0b01, - Opcode::Ceil => 0b10, - Opcode::Trunc => 0b11, - x => panic!("{} unexpected for furmi_rnd", opcode), - }); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - // XX /r, for regmove instructions (FPR version, RM encoded). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, dst), sink); - modrm_rr(src, dst, sink); - "#, - ), - ); - - // XX /n with one arg in %rcx, for shifts. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rc", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rcx), - ]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("div", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![reg_rax, reg_rdx]) - .emit( - r#" - sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg2), sink); - modrm_r_bits(in_reg2, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), - ); - - // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("mulx", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg1), sink); - modrm_r_bits(in_reg1, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), - ); - - // XX /r for BLEND* instructions - recipes.add_template_inferred( - EncodingRecipeBuilder::new("blend", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_xmm0), - OperandConstraint::RegClass(fpr), - OperandConstraint::RegClass(fpr), - ]) - .operands_out(vec![2]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - "size_with_inferred_rex_for_inreg1_inreg2", - ); - - // XX /n ib with 8-bit immediate sign-extended. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - // XX /n id with 32-bit immediate sign-extended. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, "imm", 8, 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), "size_with_inferred_rex_for_inreg0_outreg0" - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - { - // XX /n id with 32-bit immediate sign-extended. UnaryImm version. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - // XX+rd id unary with 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - - // XX+rd id unary with bool immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: u32 = if imm { 1 } else { 0 }; - sink.put4(imm); - "#, - ), - ); - - // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq unary with 64-bit immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put8(imm as u64); - "#, - ), - ); - - // XX+rd id unary with zero immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /n Unary with floating point 32-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_32bit_float( - &*formats.unary_ieee32, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - // XX /n Unary with floating point 64-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_64bit_float( - &*formats.unary_ieee64, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pushq", &formats.unary, 0) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("popq", &formats.nullary, 0) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1) - .operands_in(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); - modrm_rr(RU::rsp.into(), in_reg0, sink); - "#, - ), - ); - - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - recipes.add_recipe( - EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0) - .operands_out(vec![Stack::new(gpr)]) - .emit(""), - ); - - // XX+rd id with Abs4 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put8(0); - "#, - ), - ); - - // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put4(!0); - "#, - ), - ); - - // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put8(!0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd id with Abs4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.global_values[global_value].symbol_name(), - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.global_values[global_value].symbol_name(), - 0); - sink.put8(0); - "#, - ), - ); - - // XX+rd iq with PCRel4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // Stack addresses. - // - // TODO Alternative forms for 8-bit immediates, when applicable. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6) - .operands_out(vec![gpr]) - .emit( - r#" - let sp = StackRef::sp(stack_slot, &func.stack_slots); - let base = stk_base(sp.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - let imm : i32 = offset.into(); - sink.put4(sp.offset.checked_add(imm).unwrap() as u32); - "#, - ), - ); - - // Constant addresses. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - ); - - // Store recipes. - - { - // Simple stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into()); - - // XX /r register-indirect store with no offset. - let st = recipes.add_template_recipe( - EncodingRecipeBuilder::new("st", &formats.store, 1) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("st_abcd", &formats.store, 1) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - regs, - ) - .when_prefixed(st), - ); - - // XX /r register-indirect store of FPR with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fst", &formats.store, 1) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0); - - // XX /r register-indirect store with 8-bit offset. - let st_disp8 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp8", &formats.store, 2) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - regs, - ) - .when_prefixed(st_disp8), - ); - - // XX /r register-indirect store with 8-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - // XX /r register-indirect store with 32-bit offset. - let st_disp32 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) - .operands_in(vec![gpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) - .operands_in(vec![abcd, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - regs, - ) - .when_prefixed(st_disp32), - ); - - // XX /r register-indirect store with 32-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) - .operands_in(vec![fpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - } - - { - // Complex stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into()); - - // XX /r register-indirect store with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0); - - // XX /r register-indirect store with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0); - - // XX /r register-indirect store with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary spill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6) - .operands_in(vec![gpr]) - .operands_out(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Like spillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6) - .operands_in(vec![fpr]) - .operands_out(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Regspill using RSP-relative addressing. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Like regspill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Load recipes. - - { - // Simple loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into()); - - // XX /r load with no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0); - - // XX /r load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0); - - // XX /r load with 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with 32-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - } - - { - // Complex loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into()); - - // XX /r load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0); - - // XX /r load with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0); - - // XX /r load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary fill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Like fillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6) - .operands_in(vec![stack_fpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Regfill with RSP-relative 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Like regfill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Call/return. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ret", &formats.multiary, 0) - .emit("{{PUT_OP}}(bits, BASE_REX, sink);"), - ); - - // Branches. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpb", &formats.jump, 1) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpd", &formats.jump, 4) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brib", &formats.branch_int, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brid", &formats.branch_int, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .inst_predicate(valid_scale(&*formats.branch_table_entry)) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); - if needs_offset(in_reg1) { - modrm_sib_disp8(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - } - "#, - ), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - - // No reloc is needed here as the jump table is emitted directly after - // the function body. - jt_disp4(table, func, sink); - "#, - ), - ); - - // Test flags and set a register. - // - // These setCC instructions only set the low 8 bits, and they can only write ABCD registers - // without a REX prefix. - // - // Other instruction encodings accepting `b1` inputs have the same constraints and only look at - // the low 8 bits of the input register. - - let seti = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(seti), - ); - - let setf = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(setf), - ); - - // Conditional move (a.k.a integer select) - // (maybe-REX.W) 0F 4x modrm(r,r) - // 1 byte, modrm(r,r), is after the opcode - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rflags), - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![2]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), - ); - - // Bit scan forwards and reverse - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Arithematic with flag I/O. - - // XX /r, MR form. Add two GPR registers and set carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rout", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers and get carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rin", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![0]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers with carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rio", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Compare and set flags. - - // XX /r, MR form. Compare two GPR registers and set flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Same as rcmp, but second operand is the stack pointer. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); - modrm_rr(in_reg0, RU::rsp.into(), sink); - "#, - ), - ); - - // XX /r, RM form. Compare two FPR registers and set flags. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fcmp", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0); - - // XX /n, MI form with imm8. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_small_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0); - - // XX /n, MI form with imm32. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_big_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Test-and-branch. - // - // This recipe represents the macro fusion of a test and a conditional branch. - // This serves two purposes: - // - // 1. Guarantee that the test and branch get scheduled next to each other so - // macro fusion is guaranteed to be possible. - // 2. Hide the status flags from Cranelift which doesn't currently model flags. - // - // The encoding bits affect both the test and the branch instruction: - // - // Bits 0-7 are the Jcc opcode. - // Bits 8-15 control the test instruction which always has opcode byte 0x85. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // 8-bit test-and-branch. - - let t8jccb = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2) - .operands_in(vec![abcd]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccb), - ); - - let t8jccd = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6) - .operands_in(vec![abcd]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccd), - ); - - // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. - // The register allocator can't handle a branch instruction with constrained - // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in - // any register, but is is larger because it uses a 32-bit test instruction with - // a 0xff immediate. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6) - .operands_in(vec![gpr]) - .branch_range((11, 32)) - .emit( - r#" - // test32 r, 0xff. - {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put4(0xff); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - ); - - // Comparison that produces a `b1` result in a GPR. - // - // This is a macro of a `cmp` instruction followed by a `setCC` instruction. - // - // TODO This is not a great solution because: - // - // - The cmp+setcc combination is not recognized by CPU's macro fusion. - // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` - // instructions may need a REX independently. - // - Modeling CPU flags in the type system would be better. - // - // Since the `setCC` instructions only write an 8-bit register, we use that as - // our `b1` representation: A `b1` value is represented as a GPR where the low 8 - // bits are known to be 0 or 1. The high bits are undefined. - // - // This bandaid macro doesn't support a REX prefix for the final `setCC` - // instruction, so it is limited to the `ABCD` register class for booleans. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let is_small_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_small_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let is_big_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_big_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Make a FloatCompare instruction predicate with the supported condition codes. - // - // Same thing for floating point. - // - // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: - // - // ZPC OSA - // UN 111 000 - // GT 000 000 - // LT 001 000 - // EQ 100 000 - // - // Not all floating point condition codes are supported. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![abcd]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // `setCC` instruction, no REX. - use crate::ir::condcodes::FloatCC::*; - let setcc = match cond { - Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) - Unordered => 0x9a, // UN => setp (P=1) - OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), - UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) - GreaterThan => 0x97, // GT => seta (C=0&Z=0) - GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) - UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) - UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported by fcscc", cond), - }; - sink.put1(0x0f); - sink.put1(setcc); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - { - let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - recipes.add_template_inferred( - EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs[..], - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // Add immediate byte indicating what type of comparison. - use crate::ir::condcodes::FloatCC::*; - let imm = match cond { - Equal => 0x00, - LessThan => 0x01, - LessThanOrEqual => 0x02, - Unordered => 0x03, - NotEqual => 0x04, - UnorderedOrGreaterThanOrEqual => 0x05, - UnorderedOrGreaterThan => 0x06, - Ordered => 0x07, - _ => panic!("{} not supported by pfcmp", cond), - }; - sink.put1(imm); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Test instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Check ZF = 1 flag to see if register holds 0. - sink.put1(0x0f); - sink.put1(0x94); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put1(0xff); - // `setCC` instruction, no REX. - use crate::ir::condcodes::IntCC::*; - let setcc = 0x90 | icc2opc(Equal); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit( - r#" - sink.add_stack_map(args, func, isa); - "#, - ), - ); - - // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled. - // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function. - - recipes.add_recipe( - EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // Those data16 prefixes are necessary to pad to 16 bytes. - - // data16 lea gv@tlsgd(%rip),%rdi - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - const LEA: u8 = 0x8d; - sink.put1(LEA); // lea - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::ElfX86_64TlsGd, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // data16 data16 callq __tls_get_addr-4 - sink.put1(0x66); // data16 - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - sink.put1(0xe8); // call - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &ExternalName::LibCall(LibCall::ElfTlsGetAddr), - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // movq gv@tlv(%rip), %rdi - sink.put1(0x48); // rex - sink.put1(0x8b); // mov - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::MachOX86_64Tlv, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // callq *(%rdi) - sink.put1(0xff); - sink.put1(0x17); - "#, - ), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r) - // this maps to: out_reg0, in_reg0, in_reg1 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), rm (op2, r) - // this maps to: out_reg0, in_reg0 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes -} diff --git a/cranelift/codegen/meta/src/isa/x86/registers.rs b/cranelift/codegen/meta/src/isa/x86/registers.rs deleted file mode 100644 index 85a8965f89..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/registers.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; - -pub(crate) fn define() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("FloatRegs", "xmm") - .units(16) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("IntRegs", "r") - .units(16) - .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"]) - .track_pressure(true) - .pinned_reg(15); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["rflags"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - let gpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - let fpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8); - let gpr8 = regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8); - regs.add_class(builder); - - regs.build() -} diff --git a/cranelift/codegen/shared/src/isa/mod.rs b/cranelift/codegen/shared/src/isa/mod.rs deleted file mode 100644 index 4d8e485f6c..0000000000 --- a/cranelift/codegen/shared/src/isa/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Shared ISA-specific definitions. - -pub mod x86; diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs deleted file mode 100644 index 9edb2a6e6f..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs +++ /dev/null @@ -1,419 +0,0 @@ -//! Provides a named interface to the `u16` Encoding bits. - -use std::ops::RangeInclusive; - -/// Named interface to the `u16` Encoding bits, representing an opcode. -/// -/// Cranelift requires each recipe to have a single encoding size in bytes. -/// X86 opcodes are variable length, so we use separate recipes for different -/// styles of opcodes and prefixes. The opcode format is indicated by the -/// recipe name prefix. -/// -/// VEX/XOP and EVEX prefixes are not yet supported. -/// Encodings using any of these prefixes are represented by separate recipes. -/// -/// The encoding bits are: -/// -/// 0-7: The opcode byte . -/// 8-9: pp, mandatory prefix: -/// 00: none (Op*) -/// 01: 66 (Mp*) -/// 10: F3 (Mp*) -/// 11: F2 (Mp*) -/// 10-11: mm, opcode map: -/// 00: (Op1/Mp1) -/// 01: 0F (Op2/Mp2) -/// 10: 0F 38 (Op3/Mp3) -/// 11: 0F 3A (Op3/Mp3) -/// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -/// 15: REX.W bit (or VEX.W/E) -#[derive(Copy, Clone, PartialEq)] -pub struct EncodingBits(u16); -const OPCODE: RangeInclusive = 0..=7; -const OPCODE_PREFIX: RangeInclusive = 8..=11; // Includes pp and mm. -const RRR: RangeInclusive = 12..=14; -const REX_W: RangeInclusive = 15..=15; - -impl From for EncodingBits { - fn from(bits: u16) -> Self { - Self(bits) - } -} - -impl EncodingBits { - /// Constructs a new EncodingBits from parts. - pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self { - assert!( - !op_bytes.is_empty(), - "op_bytes must include at least one opcode byte" - ); - let mut new = Self::from(0); - let last_byte = op_bytes[op_bytes.len() - 1]; - new.write(OPCODE, last_byte as u16); - let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into(); - new.write(OPCODE_PREFIX, prefix as u16); - new.write(RRR, rrr); - new.write(REX_W, rex_w); - new - } - - /// Returns a copy of the EncodingBits with the RRR bits set. - #[inline] - pub fn with_rrr(mut self, rrr: u8) -> Self { - debug_assert_eq!(self.rrr(), 0); - self.write(RRR, rrr.into()); - self - } - - /// Returns a copy of the EncodingBits with the REX.W bit set. - #[inline] - pub fn with_rex_w(mut self) -> Self { - debug_assert_eq!(self.rex_w(), 0); - self.write(REX_W, 1); - self - } - - /// Returns the raw bits. - #[inline] - pub fn bits(self) -> u16 { - self.0 - } - - /// Convenience method for writing bits to specific range. - #[inline] - fn write(&mut self, range: RangeInclusive, value: u16) { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - let mask = (1 << size) - 1; // Generate a bit mask. - debug_assert!( - value <= mask, - "The written value should have fewer than {} bits.", - size - ); - let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask. - self.0 &= mask_complement; // Clear the bits in `range`. - let value = (value & mask) << *range.start(); // Place the value in the correct location. - self.0 |= value; // Modify the bits in `range`. - } - - /// Convenience method for reading bits from a specific range. - #[inline] - fn read(self, range: RangeInclusive) -> u8 { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits"); - let mask = (1 << size) - 1; // Generate a bit mask. - ((self.0 >> *range.start()) & mask) as u8 - } - - /// Instruction opcode byte, without the prefix. - #[inline] - pub fn opcode_byte(self) -> u8 { - self.read(OPCODE) - } - - /// Prefix kind for the instruction, as an enum. - #[inline] - pub fn prefix(self) -> OpcodePrefix { - OpcodePrefix::from(self.read(OPCODE_PREFIX)) - } - - /// Extracts the PP bits of the OpcodePrefix. - #[inline] - pub fn pp(self) -> u8 { - self.prefix().to_primitive() & 0x3 - } - - /// Extracts the MM bits of the OpcodePrefix. - #[inline] - pub fn mm(self) -> u8 { - (self.prefix().to_primitive() >> 2) & 0x3 - } - - /// Bits for the ModR/M byte for certain opcodes. - #[inline] - pub fn rrr(self) -> u8 { - self.read(RRR) - } - - /// REX.W bit (or VEX.W/E). - #[inline] - pub fn rex_w(self) -> u8 { - self.read(REX_W) - } -} - -/// Opcode prefix representation. -/// -/// The prefix type occupies four of the EncodingBits. -#[allow(non_camel_case_types)] -#[allow(missing_docs)] -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum OpcodePrefix { - Op1, - Mp1_66, - Mp1_f3, - Mp1_f2, - Op2_0f, - Mp2_66_0f, - Mp2_f3_0f, - Mp2_f2_0f, - Op3_0f_38, - Mp3_66_0f_38, - Mp3_f3_0f_38, - Mp3_f2_0f_38, - Op3_0f_3a, - Mp3_66_0f_3a, - Mp3_f3_0f_3a, - Mp3_f2_0f_3a, -} - -impl From for OpcodePrefix { - fn from(n: u8) -> Self { - use OpcodePrefix::*; - match n { - 0b0000 => Op1, - 0b0001 => Mp1_66, - 0b0010 => Mp1_f3, - 0b0011 => Mp1_f2, - 0b0100 => Op2_0f, - 0b0101 => Mp2_66_0f, - 0b0110 => Mp2_f3_0f, - 0b0111 => Mp2_f2_0f, - 0b1000 => Op3_0f_38, - 0b1001 => Mp3_66_0f_38, - 0b1010 => Mp3_f3_0f_38, - 0b1011 => Mp3_f2_0f_38, - 0b1100 => Op3_0f_3a, - 0b1101 => Mp3_66_0f_3a, - 0b1110 => Mp3_f3_0f_3a, - 0b1111 => Mp3_f2_0f_3a, - _ => panic!("invalid opcode prefix"), - } - } -} - -impl Into for OpcodePrefix { - fn into(self) -> u8 { - use OpcodePrefix::*; - match self { - Op1 => 0b0000, - Mp1_66 => 0b0001, - Mp1_f3 => 0b0010, - Mp1_f2 => 0b0011, - Op2_0f => 0b0100, - Mp2_66_0f => 0b0101, - Mp2_f3_0f => 0b0110, - Mp2_f2_0f => 0b0111, - Op3_0f_38 => 0b1000, - Mp3_66_0f_38 => 0b1001, - Mp3_f3_0f_38 => 0b1010, - Mp3_f2_0f_38 => 0b1011, - Op3_0f_3a => 0b1100, - Mp3_66_0f_3a => 0b1101, - Mp3_f3_0f_3a => 0b1110, - Mp3_f2_0f_3a => 0b1111, - } - } -} - -impl OpcodePrefix { - /// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into`. - fn to_primitive(self) -> u8 { - self.into() - } - - /// Extracts the OpcodePrefix from the opcode. - pub fn from_opcode(op_bytes: &[u8]) -> Self { - assert!(!op_bytes.is_empty(), "at least one opcode byte"); - - let prefix_bytes = &op_bytes[..op_bytes.len() - 1]; - match prefix_bytes { - [] => Self::Op1, - [0x66] => Self::Mp1_66, - [0xf3] => Self::Mp1_f3, - [0xf2] => Self::Mp1_f2, - [0x0f] => Self::Op2_0f, - [0x66, 0x0f] => Self::Mp2_66_0f, - [0xf3, 0x0f] => Self::Mp2_f3_0f, - [0xf2, 0x0f] => Self::Mp2_f2_0f, - [0x0f, 0x38] => Self::Op3_0f_38, - [0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38, - [0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38, - [0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38, - [0x0f, 0x3a] => Self::Op3_0f_3a, - [0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a, - [0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a, - [0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a, - _ => { - panic!("unexpected opcode sequence: {:?}", op_bytes); - } - } - } - - /// Returns the recipe name prefix. - /// - /// At the moment, each similar OpcodePrefix group is given its own Recipe. - /// In order to distinguish them, this string is prefixed. - pub fn recipe_name_prefix(self) -> &'static str { - use OpcodePrefix::*; - match self { - Op1 => "Op1", - Op2_0f => "Op2", - Op3_0f_38 | Op3_0f_3a => "Op3", - Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1", - Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2", - Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3", - Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3", - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Helper function for prefix_roundtrip() to avoid long lines. - fn test_roundtrip(p: OpcodePrefix) { - assert_eq!(p, OpcodePrefix::from(p.to_primitive())); - } - - /// Tests that to/from each opcode matches. - #[test] - fn prefix_roundtrip() { - test_roundtrip(OpcodePrefix::Op1); - test_roundtrip(OpcodePrefix::Mp1_66); - test_roundtrip(OpcodePrefix::Mp1_f3); - test_roundtrip(OpcodePrefix::Mp1_f2); - test_roundtrip(OpcodePrefix::Op2_0f); - test_roundtrip(OpcodePrefix::Mp2_66_0f); - test_roundtrip(OpcodePrefix::Mp2_f3_0f); - test_roundtrip(OpcodePrefix::Mp2_f2_0f); - test_roundtrip(OpcodePrefix::Op3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_66_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_38); - test_roundtrip(OpcodePrefix::Op3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_66_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a); - } - - #[test] - fn prefix_to_name() { - assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1"); - assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2"); - assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3"); - assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1"); - assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2"); - assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3"); - } - - /// Tests that the opcode_byte is the lower of the EncodingBits. - #[test] - fn encodingbits_opcode_byte() { - let enc = EncodingBits::from(0x00ff); - assert_eq!(enc.opcode_byte(), 0xff); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - - let enc = EncodingBits::from(0x00cd); - assert_eq!(enc.opcode_byte(), 0xcd); - } - - /// Tests that the OpcodePrefix is encoded correctly. - #[test] - fn encodingbits_prefix() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0xc); - assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the PP bits are encoded correctly. - #[test] - fn encodingbits_pp() { - let enc = EncodingBits::from(0x0300); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x3); - assert_eq!(enc.mm(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the MM bits are encoded correctly. - #[test] - fn encodingbits_mm() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x00); - assert_eq!(enc.mm(), 0x3); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the ModR/M bits are encoded correctly. - #[test] - fn encodingbits_rrr() { - let enc = EncodingBits::from(0x5000); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x5); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the REX.W bit is encoded correctly. - #[test] - fn encodingbits_rex_w() { - let enc = EncodingBits::from(0x8000); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x1); - } - - /// Tests setting and unsetting a bit using EncodingBits::write. - #[test] - fn encodingbits_flip() { - let mut bits = EncodingBits::from(0); - let range = 2..=2; - - bits.write(range.clone(), 1); - assert_eq!(bits.bits(), 0b100); - - bits.write(range, 0); - assert_eq!(bits.bits(), 0b000); - } - - /// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness). - #[test] - fn encodingbits_roundtrip() { - let bits: u16 = 0x1234; - assert_eq!(EncodingBits::from(bits).bits(), bits); - } - - #[test] - // I purposely want to divide the bits using the ranges defined above. - #[allow(clippy::inconsistent_digit_grouping)] - fn encodingbits_construction() { - assert_eq!( - EncodingBits::new(&[0x66, 0x40], 5, 1).bits(), - 0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode - ); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_write_to_invalid_range() { - EncodingBits::from(0).write(1..=0, 42); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_read_to_invalid_range() { - EncodingBits::from(0).read(1..=0); - } -} diff --git a/cranelift/codegen/shared/src/isa/x86/mod.rs b/cranelift/codegen/shared/src/isa/x86/mod.rs deleted file mode 100644 index fb45ae56c3..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Shared x86-specific definitions. - -mod encoding_bits; -pub use encoding_bits::*; diff --git a/cranelift/codegen/shared/src/lib.rs b/cranelift/codegen/shared/src/lib.rs index 9b4cb941ed..c031ee7440 100644 --- a/cranelift/codegen/shared/src/lib.rs +++ b/cranelift/codegen/shared/src/lib.rs @@ -22,7 +22,6 @@ pub mod condcodes; pub mod constant_hash; pub mod constants; -pub mod isa; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 6a4e18cbe3..18004b5c03 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3565,45 +3565,6 @@ pub(crate) fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - Opcode::DummySargT => unreachable!(), Opcode::Iabs => { diff --git a/cranelift/codegen/src/isa/legacy/mod.rs b/cranelift/codegen/src/isa/legacy/mod.rs index a89230f941..15900b9509 100644 --- a/cranelift/codegen/src/isa/legacy/mod.rs +++ b/cranelift/codegen/src/isa/legacy/mod.rs @@ -1,12 +1,4 @@ //! Legacy ("old-style") backends that will be removed in the future. -// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both -// included whenever building with x86 support. The new backend is the default, -// but the old can be requested with `BackendVariant::Legacy`. However, if this -// crate is built with the `old-x86-backend` feature, then the old backend is -// default instead. -#[cfg(feature = "x86")] -pub(crate) mod x86; - #[cfg(feature = "riscv")] pub(crate) mod riscv; diff --git a/cranelift/codegen/src/isa/legacy/x86/abi.rs b/cranelift/codegen/src/isa/legacy/x86/abi.rs deleted file mode 100644 index 934cfec4dd..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/abi.rs +++ /dev/null @@ -1,1102 +0,0 @@ -//! x86 ABI implementation. - -use super::super::super::settings as shared_settings; -use super::registers::{FPR, GPR, RU}; -use super::settings as isa_settings; -use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; -use crate::cursor::{Cursor, CursorPosition, EncCursor}; -use crate::ir; -use crate::ir::immediates::Imm64; -use crate::ir::stackslot::{StackOffset, StackSize}; -use crate::ir::types; -use crate::ir::{ - get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, - ValueLoc, -}; -use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa}; -use crate::regalloc::RegisterSet; -use crate::result::CodegenResult; -use crate::stack_layout::layout_stack; -use alloc::borrow::Cow; -use core::i32; -use target_lexicon::{PointerWidth, Triple}; - -/// Argument registers for x86-64 -static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9]; - -/// Return value registers. -static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx]; - -/// Argument registers for x86-64, when using windows fastcall -static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9]; - -/// Return value registers for x86-64, when using windows fastcall -static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax]; - -/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used -/// by the callee for temporary values. -/// -/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow -/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually -/// used by the callee to save & restore the values of the arguments. -/// -/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling -/// convention reserves spill space for parameters, you don’t have to use them as such" -const WIN_SHADOW_STACK_SPACE: StackSize = 32; - -/// Stack alignment requirement for functions. -/// -/// 16 bytes is the perfect stack alignment, because: -/// -/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which -/// are aligned to 16 bytes in order to aid performance". -/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a -/// 16-byte aligned stack pointer. -/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located -/// higher up in the stack. -const STACK_ALIGNMENT: u32 = 16; - -#[derive(Clone)] -struct Args { - pointer_bytes: u8, - pointer_bits: u8, - pointer_type: ir::Type, - gpr: &'static [RU], - gpr_used: usize, - fpr_limit: usize, - fpr_used: usize, - offset: u32, - call_conv: CallConv, - shared_flags: shared_settings::Flags, - #[allow(dead_code)] - isa_flags: isa_settings::Flags, - assigning_returns: bool, -} - -impl Args { - fn new( - bits: u8, - gpr: &'static [RU], - fpr_limit: usize, - call_conv: CallConv, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, - assigning_returns: bool, - ) -> Self { - let offset = if call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - Self { - pointer_bytes: bits / 8, - pointer_bits: bits, - pointer_type: ir::Type::int(u16::from(bits)).unwrap(), - gpr, - gpr_used: 0, - fpr_limit, - fpr_used: 0, - offset, - call_conv, - shared_flags: shared_flags.clone(), - isa_flags: isa_flags.clone(), - assigning_returns, - } - } -} - -impl ArgAssigner for Args { - fn assign(&mut self, arg: &AbiParam) -> ArgAction { - if let ArgumentPurpose::StructArgument(size) = arg.purpose { - if self.call_conv != CallConv::SystemV { - panic!( - "The sarg argument purpose is not yet implemented for non-systemv call conv {:?}", - self.call_conv, - ); - } - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += size; - debug_assert!(self.offset <= i32::MAX as u32); - return ArgAction::AssignAndChangeType(loc, types::SARG_T); - } - - let ty = arg.value_type; - - if ty.bits() > u16::from(self.pointer_bits) { - if !self.assigning_returns && self.call_conv.extends_windows_fastcall() { - // "Any argument that doesn't fit in 8 bytes, or isn't - // 1, 2, 4, or 8 bytes, must be passed by reference" - return ValueConversion::Pointer(self.pointer_type).into(); - } else if !ty.is_vector() && !ty.is_float() { - // On SystemV large integers and booleans are broken down to fit in a register. - return ValueConversion::IntSplit.into(); - } - } - - // Vectors should stay in vector registers unless SIMD is not enabled--then they are split - if ty.is_vector() { - if self.shared_flags.enable_simd() { - let reg = FPR.unit(self.fpr_used); - self.fpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - return ValueConversion::VectorSplit.into(); - } - - // Small integers are extended to the size of a pointer register, but - // only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI - // does, but our other supported ABIs on x86 do not. - if ty.is_int() - && ty.bits() < u16::from(self.pointer_bits) - && self.call_conv.extends_baldrdash() - { - match arg.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), - ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), - } - } - - // Handle special-purpose arguments. - if ty.is_int() && self.call_conv.extends_baldrdash() { - match arg.purpose { - // This is SpiderMonkey's `WasmTlsReg`. - ArgumentPurpose::VMContext => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r14 - } else { - RU::rsi - } as RegUnit) - .into(); - } - // This is SpiderMonkey's `WasmTableCallSigReg`. - ArgumentPurpose::SignatureId => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r10 - } else { - RU::rcx - } as RegUnit) - .into() - } - _ => {} - } - } - - // Try to use a GPR. - if !ty.is_float() && self.gpr_used < self.gpr.len() { - let reg = self.gpr[self.gpr_used] as RegUnit; - self.gpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Try to use an FPR. - let fpr_offset = if self.call_conv.extends_windows_fastcall() { - // Float and general registers on windows share the same parameter index. - // The used register depends entirely on the parameter index: Even if XMM0 - // is not used for the first parameter, it cannot be used for the second parameter. - debug_assert_eq!(self.fpr_limit, self.gpr.len()); - &mut self.gpr_used - } else { - &mut self.fpr_used - }; - - if ty.is_float() && *fpr_offset < self.fpr_limit { - let reg = FPR.unit(*fpr_offset); - *fpr_offset += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Assign a stack location. - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += u32::from(self.pointer_bytes); - debug_assert!(self.offset <= i32::MAX as u32); - loc.into() - } -} - -/// Legalize `sig`. -pub fn legalize_signature( - sig: &mut Cow, - triple: &Triple, - _current: bool, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, -) { - let bits; - let mut args; - - match triple.pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => { - bits = 32; - args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false); - } - PointerWidth::U64 => { - bits = 64; - args = if sig.call_conv.extends_windows_fastcall() { - Args::new( - bits, - &ARG_GPRS_WIN_FASTCALL_X64[..], - 4, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - } else { - Args::new( - bits, - &ARG_GPRS[..], - 8, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - }; - } - } - - let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() { - // windows-x64 calling convention only uses XMM0 or RAX for return values - (&RET_GPRS_WIN_FASTCALL_X64[..], 1) - } else { - (&RET_GPRS[..], 2) - }; - - let mut rets = Args::new( - bits, - ret_regs, - ret_fpr_limit, - sig.call_conv, - shared_flags, - isa_flags, - true, - ); - - // If we don't have enough available return registers - // to fit all of the return values, we need to backtrack and start - // assigning locations all over again with a different strategy. In order to - // do that, we need a copy of the original assigner for the returns. - let mut backup_rets = rets.clone(); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - if new_returns - .iter() - .filter(|r| r.purpose == ArgumentPurpose::Normal) - .any(|r| !r.location.is_reg()) - { - // The return values couldn't all fit into available return - // registers. Introduce the use of a struct-return parameter. - debug_assert!(!sig.uses_struct_return_param()); - - // We're using the first register for the return pointer parameter. - let mut ret_ptr_param = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match args.assign(&ret_ptr_param) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_param.location = ArgumentLoc::Reg(reg); - sig.to_mut().params.push(ret_ptr_param); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - // We're using the first return register for the return pointer (like - // sys v does). - let mut ret_ptr_return = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match backup_rets.assign(&ret_ptr_return) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_return.location = ArgumentLoc::Reg(reg); - sig.to_mut().returns.push(ret_ptr_return); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - sig.to_mut().returns.retain(|ret| { - // Either this is the return pointer, in which case we want to keep - // it, or else assume that it is assigned for a reason and doesn't - // conflict with our return pointering legalization. - debug_assert_eq!( - ret.location.is_assigned(), - ret.purpose != ArgumentPurpose::Normal - ); - ret.location.is_assigned() - }); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) { - sig.to_mut().returns = new_returns; - } - } else { - sig.to_mut().returns = new_returns; - } - } - - if let Some(new_params) = legalize_args(&sig.params, &mut args) { - sig.to_mut().params = new_params; - } -} - -/// Get register class for a type appearing in a legalized signature. -pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { - if ty.is_int() || ty.is_bool() || ty.is_ref() { - GPR - } else { - FPR - } -} - -/// Get the set of allocatable registers for `func`. -pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet { - let mut regs = RegisterSet::new(); - regs.take(GPR, RU::rsp as RegUnit); - regs.take(GPR, RU::rbp as RegUnit); - - // 32-bit arch only has 8 registers. - if triple.pointer_width().unwrap() != PointerWidth::U64 { - for i in 8..16 { - regs.take(GPR, GPR.unit(i)); - regs.take(FPR, FPR.unit(i)); - } - if flags.enable_pinned_reg() { - unimplemented!("Pinned register not implemented on x86-32."); - } - } else { - // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and - // isn't the fixed output register of any instruction. - if flags.enable_pinned_reg() { - regs.take(GPR, RU::r15 as RegUnit); - } - } - - regs -} - -/// Get the set of callee-saved general-purpose registers. -fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are - // considered nonvolatile and must be saved and restored by a function that uses - // them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention - // RSP & RBP are not listed below, since they are restored automatically during - // a function call. If that wasn't the case, function calls (RET) would not work. - &[ - RU::rbx, - RU::rdi, - RU::rsi, - RU::r12, - RU::r13, - RU::r14, - RU::r15, - ] - } else { - &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15] - } - } - } -} - -/// Get the set of callee-saved floating-point (SIMD) registers. -fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved - // and restored by a function that uses them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of - // February 5th, 2020. - &[ - RU::xmm6, - RU::xmm7, - RU::xmm8, - RU::xmm9, - RU::xmm10, - RU::xmm11, - RU::xmm12, - RU::xmm13, - RU::xmm14, - RU::xmm15, - ] - } else { - &[] - } - } - } -} - -/// Get the set of callee-saved registers that are used. -fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { - let mut all_callee_saved = RegisterSet::empty(); - for reg in callee_saved_gprs(isa, func.signature.call_conv) { - all_callee_saved.free(GPR, *reg as RegUnit); - } - for reg in callee_saved_fprs(isa, func.signature.call_conv) { - all_callee_saved.free(FPR, *reg as RegUnit); - } - - let mut used = RegisterSet::empty(); - for value_loc in func.locations.values() { - // Note that `value_loc` here contains only a single unit of a potentially multi-unit - // register. We don't use registers that overlap each other in the x86 ISA, but in others - // we do. So this should not be blindly reused. - if let ValueLoc::Reg(ru) = *value_loc { - if GPR.contains(ru) { - if !used.is_avail(GPR, ru) { - used.free(GPR, ru); - } - } else if FPR.contains(ru) { - if !used.is_avail(FPR, ru) { - used.free(FPR, ru); - } - } - } - } - - // regmove and regfill instructions may temporarily divert values into other registers, - // and these are not reflected in `func.locations`. Scan the function for such instructions - // and note which callee-saved registers they use. - // - // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible - // to avoid this step. - for block in &func.layout { - for inst in func.layout.block_insts(block) { - match func.dfg[inst] { - ir::instructions::InstructionData::RegMove { dst, .. } - | ir::instructions::InstructionData::RegFill { dst, .. } => { - if GPR.contains(dst) { - if !used.is_avail(GPR, dst) { - used.free(GPR, dst); - } - } else if FPR.contains(dst) { - if !used.is_avail(FPR, dst) { - used.free(FPR, dst); - } - } - } - _ => (), - } - } - } - - used.intersect(&all_callee_saved); - used -} - -pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - match func.signature.call_conv { - // For now, just translate fast and cold as system_v. - CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => { - system_v_prologue_epilogue(func, isa) - } - CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => { - fastcall_prologue_epilogue(func, isa) - } - CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => { - baldrdash_prologue_epilogue(func, isa) - } - CallConv::Probestack => unimplemented!("probestack calling convention"), - CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"), - CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), - } -} - -fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - debug_assert!( - !isa.flags().enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - - let word_size = StackSize::from(isa.pointer_bytes()); - let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - let bytes = - StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size; - - let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); - ss.offset = Some(-(bytes as StackOffset)); - func.stack_slots.push(ss); - - let is_leaf = func.is_leaf(); - layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?; - Ok(()) -} - -/// Implementation of the fastcall-based Win64 calling convention described at [1] -/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention -fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - if isa.triple().pointer_width().unwrap() != PointerWidth::U64 { - panic!("TODO: windows-fastcall: x86-32 not implemented yet"); - } - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csrs = callee_saved_regs_used(isa, func); - let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32; - let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32; - let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size; - - // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed - if fpsr_stack_size > 0 { - csr_stack_size = (csr_stack_size + 15) & !15; - } - - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size, - offset: Some(-(csr_stack_size as StackOffset)), - }); - - let is_leaf = func.is_leaf(); - - // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space - if !is_leaf { - // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::ExplicitSlot, - size: WIN_SHADOW_STACK_SPACE, - offset: None, - }); - } - - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - - // Subtract the GPR saved register size from the local size because pushes are used for the saves - let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32); - - // Add CSRs to function signature - let reg_type = isa.pointer_type(); - let sp_arg_index = if fpsr_stack_size > 0 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for gp_csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - for fp_csr in csrs.iter(FPR) { - // The calling convention described in - // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires - // preserving the low 128 bits of XMM6-XMM15. - let csr_arg = - ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert a System V-compatible prologue and epilogue. -fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - let pointer_width = isa.triple().pointer_width().unwrap(); - let word_size = pointer_width.bytes() as usize; - - let csrs = callee_saved_regs_used(isa, func); - assert!( - csrs.iter(FPR).len() == 0, - "SysV ABI does not have callee-save SIMD registers" - ); - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size as u32, - offset: Some(-csr_stack_size), - }); - - let is_leaf = func.is_leaf(); - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - let local_stack_size = i64::from(total_stack_size - csr_stack_size); - - // Add CSRs to function signature - let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap(); - // On X86-32 all parameters, including vmctx, are passed on stack, and we need - // to extract vmctx from the stack before we can save the frame pointer. - let sp_arg_index = if isa.pointer_bits() == 32 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert the prologue for a given function. -/// This is used by common calling conventions such as System V. -fn insert_common_prologue( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - has_sp_param: bool, - isa: &dyn TargetIsa, -) { - let sp = if has_sp_param { - let block = pos.current_block().expect("missing block under cursor"); - let sp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit); - Some(sp) - } else { - None - }; - - // If this is a leaf function with zero stack, then there's no need to - // insert a stack check since it can't overflow anything and - // forward-progress is guarantee so long as loop are handled anyway. - // - // If this has a stack size it could stack overflow, or if it isn't a leaf - // it could be part of a long call chain which we need to check anyway. - // - // First we look for the stack limit as a special argument to the function, - // and failing that we see if a custom stack limit factory has been provided - // which will be used to likely calculate the stack limit from the arguments - // or perhaps constants. - if stack_size > 0 || !pos.func.is_leaf() { - let scratch = ir::ValueLoc::Reg(RU::rax as RegUnit); - let stack_limit_arg = match pos.func.special_param(ArgumentPurpose::StackLimit) { - Some(arg) => { - let copy = pos.ins().copy(arg); - pos.func.locations[copy] = scratch; - Some(copy) - } - None => pos - .func - .stack_limit - .map(|gv| interpret_gv(pos, gv, sp, scratch)), - }; - if let Some(stack_limit_arg) = stack_limit_arg { - insert_stack_check(pos, stack_size, stack_limit_arg); - } - } - - // Append param to entry block - let block = pos.current_block().expect("missing block under cursor"); - let fp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - - pos.ins().x86_push(fp); - - let mov_sp_inst = pos - .ins() - .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit); - - let mut last_csr_push = None; - for reg in csrs.iter(GPR) { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, reg_type); - - // Assign it a location - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - last_csr_push = Some(pos.ins().x86_push(csr_arg)); - } - - // Allocate stack frame storage. - let mut adjust_sp_inst = None; - if stack_size > 0 { - if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2()) - { - // Emit a stack probe. - let rax = RU::rax as RegUnit; - let rax_val = ir::ValueLoc::Reg(rax); - - // The probestack function expects its input in %rax. - let arg = pos.ins().iconst(reg_type, stack_size); - pos.func.locations[arg] = rax_val; - - // Call the probestack function. - let callee = get_probestack_funcref(pos.func, reg_type, rax, isa); - - // Make the call. - let call = if !isa.flags().is_pic() - && isa.triple().pointer_width().unwrap() == PointerWidth::U64 - && !pos.func.dfg.ext_funcs[callee].colocated - { - // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect. - // Use r11 as it may be clobbered under all supported calling conventions. - let r11 = RU::r11 as RegUnit; - let sig = pos.func.dfg.ext_funcs[callee].signature; - let addr = pos.ins().func_addr(reg_type, callee); - pos.func.locations[addr] = ir::ValueLoc::Reg(r11); - pos.ins().call_indirect(sig, addr, &[arg]) - } else { - // Otherwise just do a normal call. - pos.ins().call(callee, &[arg]) - }; - - // If the probestack function doesn't adjust sp, do it ourselves. - if !isa.flags().probestack_func_adjusts_sp() { - let result = pos.func.dfg.inst_results(call)[0]; - pos.func.locations[result] = rax_val; - adjust_sp_inst = Some(pos.ins().adjust_sp_down(result)); - } - } else { - // Simply decrement the stack pointer. - adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size))); - } - } - - // With the stack pointer adjusted, save any callee-saved floating point registers via offset - // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes - let mut last_fpr_save = None; - - for (i, reg) in csrs.iter(FPR).enumerate() { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2); - - // Since regalloc has already run, we must assign a location. - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - last_fpr_save = Some(pos.ins().store( - ir::MemFlags::trusted(), - csr_arg, - sp.expect("FPR save requires SP param"), - (stack_size - offset) as i32, - )); - } - - pos.func.prologue_end = Some( - last_fpr_save - .or(adjust_sp_inst) - .or(last_csr_push) - .unwrap_or(mov_sp_inst), - ); -} - -/// Inserts code necessary to calculate `gv`. -/// -/// Note that this is typically done with `ins().global_value(...)` but that -/// requires legalization to run to encode it, and we're running super late -/// here in the backend where legalization isn't possible. To get around this -/// we manually interpret the `gv` specified and do register allocation for -/// intermediate values. -/// -/// This is an incomplete implementation of loading `GlobalValue` values to get -/// compared to the stack pointer, but currently it serves enough functionality -/// to get this implemented in `wasmtime` itself. This'll likely get expanded a -/// bit over time! -fn interpret_gv( - pos: &mut EncCursor, - gv: ir::GlobalValue, - sp: Option, - scratch: ir::ValueLoc, -) -> ir::Value { - match pos.func.global_values[gv] { - ir::GlobalValueData::VMContext => { - let vmctx_index = pos - .func - .signature - .special_param_index(ir::ArgumentPurpose::VMContext) - .expect("no vmcontext parameter found"); - match pos.func.signature.params[vmctx_index] { - AbiParam { - location: ArgumentLoc::Reg(_), - .. - } => { - let entry = pos.func.layout.entry_block().unwrap(); - pos.func.dfg.block_params(entry)[vmctx_index] - } - AbiParam { - location: ArgumentLoc::Stack(offset), - value_type, - .. - } => { - let offset = - offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8)); - // The following access can be marked `trusted` because it is a load of an argument. We - // know it is safe because it was safe to write it in preparing this function call. - let ret = - pos.ins() - .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset); - pos.func.locations[ret] = scratch; - return ret; - } - AbiParam { - location: ArgumentLoc::Unassigned, - .. - } => unreachable!(), - } - } - ir::GlobalValueData::Load { - base, - offset, - global_type, - readonly: _, - } => { - let base = interpret_gv(pos, base, sp, scratch); - let ret = pos - .ins() - .load(global_type, ir::MemFlags::trusted(), base, offset); - pos.func.locations[ret] = scratch; - return ret; - } - ref other => panic!("global value for stack limit not supported: {}", other), - } -} - -/// Insert a check that generates a trap if the stack pointer goes -/// below a value in `stack_limit_arg`. -fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) { - use crate::ir::condcodes::IntCC; - - // Our stack pointer, after subtracting `stack_size`, must not be below - // `stack_limit_arg`. To do this we're going to add `stack_size` to - // `stack_limit_arg` and see if the stack pointer is below that. The - // `stack_size + stack_limit_arg` computation might overflow, however, due - // to how stack limits may be loaded and set externally to trigger a trap. - // - // To handle this we'll need an extra comparison to see if the stack - // pointer is already below `stack_limit_arg`. Most of the time this - // isn't necessary though since the stack limit which triggers a trap is - // likely a sentinel somewhere around `usize::max_value()`. In that case - // only conditionally emit this pre-flight check. That way most functions - // only have the one comparison, but are also guaranteed that if we add - // `stack_size` to `stack_limit_arg` is won't overflow. - // - // This does mean that code generators which use this stack check - // functionality need to ensure that values stored into the stack limit - // will never overflow if this threshold is added. - if stack_size >= 32 * 1024 { - let cflags = pos.ins().ifcmp_sp(stack_limit_arg); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); - } - - // Copy `stack_limit_arg` into a %rax and use it for calculating - // a SP threshold. - let sp_threshold = pos.ins().iadd_imm(stack_limit_arg, stack_size); - pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit); - - // If the stack pointer currently reaches the SP threshold or below it then after opening - // the current stack frame, the current stack pointer will reach the limit. - let cflags = pos.ins().ifcmp_sp(sp_threshold); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); -} - -/// Find all `return` instructions and insert epilogues before them. -fn insert_common_epilogues( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - while let Some(block) = pos.next_block() { - pos.goto_last_inst(block); - if let Some(inst) = pos.current_inst() { - if pos.func.dfg[inst].opcode().is_return() { - insert_common_epilogue(inst, block, stack_size, pos, reg_type, csrs, sp_arg_index); - } - } - } -} - -/// Insert an epilogue given a specific `return` instruction. -/// This is used by common calling conventions such as System V. -fn insert_common_epilogue( - inst: ir::Inst, - block: ir::Block, - stack_size: i64, - pos: &mut EncCursor, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - // Insert the pop of the frame pointer - let fp_pop = pos.ins().x86_pop(reg_type); - let fp_pop_inst = pos.prev_inst().unwrap(); - pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - pos.func.dfg.append_inst_arg(inst, fp_pop); - - // Insert the CSR pops - let mut first_csr_pop_inst = None; - for reg in csrs.iter(GPR) { - let csr_pop = pos.ins().x86_pop(reg_type); - first_csr_pop_inst = pos.prev_inst(); - assert!(first_csr_pop_inst.is_some()); - pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, csr_pop); - } - - // Insert the adjustment of SP - let mut sp_adjust_inst = None; - if stack_size > 0 { - pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); - sp_adjust_inst = pos.prev_inst(); - assert!(sp_adjust_inst.is_some()); - } - - let mut first_fpr_load = None; - if let Some(index) = sp_arg_index { - let sp = pos - .func - .dfg - .block_params(pos.func.layout.entry_block().unwrap())[index]; - - // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads) - for (i, reg) in csrs.iter(FPR).enumerate() { - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - let value = pos.ins().load( - types::F64X2, - ir::MemFlags::trusted(), - sp, - (stack_size - offset) as i32, - ); - - first_fpr_load.get_or_insert(pos.current_inst().expect("current inst")); - - pos.func.locations[value] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, value); - } - } else { - assert!(csrs.iter(FPR).len() == 0); - } - - pos.func.epilogues_start.push(( - first_fpr_load - .or(sp_adjust_inst) - .or(first_csr_pop_inst) - .unwrap_or(fp_pop_inst), - block, - )); -} - -#[cfg(feature = "unwind")] -pub fn create_unwind_info( - func: &ir::Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - use crate::isa::unwind::UnwindInfo; - use crate::machinst::UnwindInfoKind; - - // Assumption: RBP is being used as the frame pointer for both calling conventions - // In the future, we should be omitting frame pointer as an optimization, so this will change - Ok(match isa.unwind_info_kind() { - UnwindInfoKind::SystemV => { - super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u)) - } - UnwindInfoKind::Windows => { - super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) - } - UnwindInfoKind::None => None, - }) -} diff --git a/cranelift/codegen/src/isa/legacy/x86/binemit.rs b/cranelift/codegen/src/isa/legacy/x86/binemit.rs deleted file mode 100644 index 0480873672..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/binemit.rs +++ /dev/null @@ -1,578 +0,0 @@ -//! Emitting binary x86 machine code. - -use super::enc_tables::{needs_offset, needs_sib_byte}; -use super::registers::RU; -use crate::binemit::{bad_encoding, CodeSink, Reloc}; -use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::{ - Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode, - TrapCode, -}; -use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa}; -use crate::regalloc::RegDiversions; -use cranelift_codegen_shared::isa::x86::EncodingBits; - -include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs")); - -// Convert a stack base to the corresponding register. -fn stk_base(base: StackBase) -> RegUnit { - let ru = match base { - StackBase::SP => RU::rsp, - StackBase::FP => RU::rbp, - StackBase::Zone => unimplemented!(), - }; - ru as RegUnit -} - -// Mandatory prefix bytes for Mp* opcodes. -const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2]; - -// Second byte for three-byte opcodes for mm=0b10 and mm=0b11. -const OP3_BYTE2: [u8; 2] = [0x38, 0x3a]; - -// A REX prefix with no bits set: 0b0100WRXB. -const BASE_REX: u8 = 0b0100_0000; - -// Create a single-register REX prefix, setting the B bit to bit 3 of the register. -// This is used for instructions that encode a register in the low 3 bits of the opcode and for -// instructions that use the ModR/M `reg` field for something else. -fn rex1(reg_b: RegUnit) -> u8 { - let b = ((reg_b >> 3) & 1) as u8; - BASE_REX | b -} - -// Create a dual-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - BASE_REX | b | (r << 2) -} - -// Create a three-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -// REX.X = bit 3 of SIB index register. -fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - let x = ((index >> 3) & 1) as u8; - BASE_REX | b | (x << 1) | (r << 2) -} - -/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1 -/// in the Intel Software Development Manual, volume 2A. These bits can be used by different -/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one. -fn evex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = (!(rm >> 3) & 1) as u8; - let x = (!(rm >> 4) & 1) as u8; - let r = (!(reg >> 3) & 1) as u8; - let r_ = (!(reg >> 4) & 1) as u8; - 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) -} - -/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits -/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because -/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in -/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it -/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB -/// fields; see section 2.2.1.2 in the Intel Software Development Manual. -#[inline] -fn needs_rex(rex: u8) -> bool { - rex != BASE_REX -} - -// Emit a REX prefix. -// -// The R, X, and B bits are computed from registers using the functions above. The W bit is -// extracted from `bits`. -fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(rex & 0xf8, BASE_REX); - let w = EncodingBits::from(bits).rex_w(); - sink.put1(rex | (w << 3)); -} - -// Emit a single-byte opcode with no REX prefix. -fn put_op1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding"); - sink.put1(bits as u8); -} - -// Emit a single-byte opcode with REX prefix. -fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*"); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -/// Emit a single-byte opcode with inferred REX prefix. -fn put_dynrexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*"); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX -fn put_op2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX with REX prefix. -fn put_rexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*"); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode: 0F XX with inferred REX prefix. -fn put_dynrexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0f00, - 0x0400, - "Invalid encoding bits for DynRexOp2*" - ); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix. -fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding"); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix and REX. -fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix. -fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix and REX. -fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX. -fn put_dynrexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0c00, - 0x0400, - "Invalid encoding bits for DynRexMp2*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix. -fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX -fn put_rexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix. -fn put_dynrexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0800, - 0x0800, - "Invalid encoding bits for DynRexMp3*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in -/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be -/// used together for certain classes of instructions; i.e., special care should be taken to ensure -/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where -/// opcodes can result in an #UD. -#[allow(dead_code)] -enum EvexContext { - RoundingRegToRegFP { - rc: EvexRoundingControl, - }, - NoRoundingFP { - sae: bool, - length: EvexVectorLength, - }, - MemoryOp { - broadcast: bool, - length: EvexVectorLength, - }, - Other { - length: EvexVectorLength, - }, -} - -impl EvexContext { - /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1, - Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1, - Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1, - Self::Other { length } => length.bits() << 1, - } - } -} - -/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexVectorLength { - V128, - V256, - V512, -} - -impl EvexVectorLength { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::V128 => 0b00, - Self::V256 => 0b01, - Self::V512 => 0b10, - // 0b11 is reserved (#UD). - } - } -} - -/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexRoundingControl { - RNE, - RD, - RU, - RZ, -} - -impl EvexRoundingControl { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RNE => 0b00, - Self::RD => 0b01, - Self::RU => 0b10, - Self::RZ => 0b11, - } - } -} - -/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel -/// Software Development Manual, volume 2A. -#[allow(dead_code)] -enum EvexMasking { - None, - Merging { k: u8 }, - Zeroing { k: u8 }, -} - -impl EvexMasking { - /// Encode the `z` bit for merging with the P2 byte. - fn z_bit(&self) -> u8 { - match self { - Self::None | Self::Merging { .. } => 0, - Self::Zeroing { .. } => 1, - } - } - - /// Encode the `aaa` bits for merging with the P2 byte. - fn aaa_bits(&self) -> u8 { - match self { - Self::None => 0b000, - Self::Merging { k } | Self::Zeroing { k } => { - debug_assert!(*k <= 7); - *k - } - } - } -} - -/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe -/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function -/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are -/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring -/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix). -fn put_evex( - bits: u16, - reg: RegUnit, - vvvvv: RegUnit, - rm: RegUnit, - context: EvexContext, - masking: EvexMasking, - sink: &mut CS, -) { - let enc = EncodingBits::from(bits); - - // EVEX prefix. - sink.put1(0x62); - - debug_assert!(enc.mm() < 0b100); - let mut p0 = enc.mm() & 0b11; - p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset - sink.put1(p0); - - let mut p1 = enc.pp() | 0b100; // bit 2 is always set - p1 |= (!(vvvvv as u8) & 0b1111) << 3; - p1 |= (enc.rex_w() & 0b1) << 7; - sink.put1(p1); - - let mut p2 = masking.aaa_bits(); - p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3; - p2 |= context.bits() << 4; - p2 |= masking.z_bit() << 7; - sink.put1(p2); - - // Opcode - sink.put1(enc.opcode_byte()); - - // ModR/M byte placed in recipe -} - -/// Emit a ModR/M byte for reg-reg operands. -fn modrm_rr(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a ModR/M byte where the reg bits are part of the opcode. -fn modrm_r_bits(rm: RegUnit, bits: u16, sink: &mut CS) { - let reg = (bits >> 12) as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset. -/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an -/// absolute immediate 32-bit address. -fn modrm_rm(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b00000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address -/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual -/// section 2.2.1.6. -fn modrm_riprel(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b101, reg, sink) -} - -/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp8(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b01000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b10000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present. -fn modrm_sib(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b100, reg, sink); -} - -/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit -/// displacement are present. -fn modrm_sib_disp8(reg: RegUnit, sink: &mut CS) { - modrm_disp8(0b100, reg, sink); -} - -/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit -/// displacement are present. -fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { - modrm_disp32(0b100, reg, sink); -} - -/// Emit a SIB byte with a base register and no scale+index. -fn sib_noindex(base: RegUnit, sink: &mut CS) { - let base = base as u8 & 7; - // SIB SS_III_BBB. - let mut b = 0b00_100_000; - b |= base; - sink.put1(b); -} - -/// Emit a SIB byte with a scale, base, and index. -fn sib(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) { - // SIB SS_III_BBB. - debug_assert_eq!(scale & !0x03, 0, "Scale out of range"); - let scale = scale & 3; - let index = index as u8 & 7; - let base = base as u8 & 7; - let b: u8 = (scale << 6) | (index << 3) | base; - sink.put1(b); -} - -/// Get the low 4 bits of an opcode for an integer condition code. -/// -/// Add this offset to a base opcode for: -/// -/// ---- 0x70: Short conditional branch. -/// 0x0f 0x80: Long conditional branch. -/// 0x0f 0x90: SetCC. -/// -fn icc2opc(cond: IntCC) -> u16 { - use crate::ir::condcodes::IntCC::*; - match cond { - Overflow => 0x0, - NotOverflow => 0x1, - UnsignedLessThan => 0x2, - UnsignedGreaterThanOrEqual => 0x3, - Equal => 0x4, - NotEqual => 0x5, - UnsignedLessThanOrEqual => 0x6, - UnsignedGreaterThan => 0x7, - // 0x8 = Sign. - // 0x9 = !Sign. - // 0xa = Parity even. - // 0xb = Parity odd. - SignedLessThan => 0xc, - SignedGreaterThanOrEqual => 0xd, - SignedLessThanOrEqual => 0xe, - SignedGreaterThan => 0xf, - } -} - -/// Get the low 4 bits of an opcode for a floating point condition code. -/// -/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: -/// -/// ZPC OSA -/// UN 111 000 -/// GT 000 000 -/// LT 001 000 -/// EQ 100 000 -/// -/// Not all floating point condition codes are supported. -fn fcc2opc(cond: FloatCC) -> u16 { - use crate::ir::condcodes::FloatCC::*; - match cond { - Ordered => 0xb, // EQ|LT|GT => *np (P=0) - Unordered => 0xa, // UN => *p (P=1) - OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0), - UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1) - GreaterThan => 0x7, // GT => *a (C=0&Z=0) - GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0) - UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1) - UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported", cond), - } -} - -/// Emit a single-byte branch displacement to `destination`. -fn disp1(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1); - sink.put1(delta as u8); -} - -/// Emit a four-byte branch displacement to `destination`. -fn disp4(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4); - sink.put4(delta); -} - -/// Emit a four-byte displacement to jump table `jt`. -fn jt_disp4(jt: JumpTable, func: &Function, sink: &mut CS) { - let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_jt(Reloc::X86PCRelRodata4, jt); -} - -/// Emit a four-byte displacement to `constant`. -fn const_disp4(constant: Constant, func: &Function, sink: &mut CS) { - let offset = func.dfg.constants.get_offset(constant); - let delta = offset.wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_constant(Reloc::X86PCRelRodata4, offset); -} diff --git a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs b/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs deleted file mode 100644 index 72890cffd9..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs +++ /dev/null @@ -1,1894 +0,0 @@ -//! Encoding tables for x86 ISAs. - -use super::registers::*; -use crate::bitset::BitSet; -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::condcodes::{FloatCC, IntCC}; -use crate::ir::types::*; -use crate::ir::{self, Function, Inst, InstBuilder, MemFlags}; -use crate::isa::constraints::*; -use crate::isa::enc_tables::*; -use crate::isa::encoding::base_size; -use crate::isa::encoding::{Encoding, RecipeSizing}; -use crate::isa::RegUnit; -use crate::isa::{self, TargetIsa}; -use crate::legalizer::expand_as_libcall; -use crate::predicates; -use crate::regalloc::RegDiversions; - -include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); -include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); - -/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB). -/// -/// Normal x86 instructions have only 3 bits for encoding a register. -/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits. -pub fn is_extended_reg(reg: RegUnit) -> bool { - // Extended registers have the fourth bit set. - reg as u8 & 0b1000 != 0 -} - -pub fn needs_sib_byte(reg: RegUnit) -> bool { - reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit -} -pub fn needs_offset(reg: RegUnit) -> bool { - reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit -} -pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool { - needs_sib_byte(reg) || needs_offset(reg) -} - -fn test_input( - op_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); - condition_func(in_reg) -} - -fn test_result( - result_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations); - condition_func(out_reg) -} - -fn size_plus_maybe_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(0, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(1, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(0, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(1, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset. -fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB. -fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB or offset. -fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB. -fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the second operand. -fn size_with_inferred_rex_for_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the third operand. -fn size_with_inferred_rex_for_inreg2( - sizing: &RecipeSizing, - _: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand. -fn size_with_inferred_rex_for_inreg1_inreg2( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single -/// input register and a single output register. -fn size_with_inferred_rex_for_inreg0_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single output register. -fn size_with_inferred_rex_for_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV. -/// -/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2. -fn size_with_inferred_rex_for_cmov( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// If the value's definition is a constant immediate, returns its unpacked value, or None -/// otherwise. -fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option { - if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) { - if let ir::InstructionData::UnaryImm { - opcode: ir::Opcode::Iconst, - imm, - } = &pos.func.dfg[*inst] - { - let value: i64 = (*imm).into(); - Some(value) - } else { - None - } - } else { - None - } -} - -/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. -fn expand_sdivrem( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_srem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Sdiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Srem, - args, - } => (args[0], args[1], true), - _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - let avoid_div_traps = isa.flags().avoid_div_traps(); - - // If we can tolerate native division traps, sdiv doesn't need branching. - if !avoid_div_traps && !is_srem { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - pos.ins().with_result(result).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // Try to remove checks if the input value is an immediate other than 0 or -1. For these two - // immediates, we'd ideally replace conditional traps by traps, but this requires more - // manipulation of the dfg/cfg, which is out of scope here. - let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) { - (imm == 0, imm == -1) - } else { - (true, true) - }; - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps && could_be_zero { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - - if !could_be_minus_one { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let reuse = if is_srem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // block handling the nominal case. - let nominal = pos.func.dfg.make_block(); - - // block handling the -1 divisor case. - let minus_one = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.attach_block_param(done, result); - - // Start by checking for a -1 divisor which needs to be handled specially. - let is_m1 = pos.ins().ifcmp_imm(y, -1); - pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); - pos.ins().jump(nominal, &[]); - - // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division - // by zero. - pos.insert_block(nominal); - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); - let divres = if is_srem { rem } else { quot }; - pos.ins().jump(done, &[divres]); - - // Now deal with the -1 divisor case. - pos.insert_block(minus_one); - let m1_result = if is_srem { - // x % -1 = 0. - pos.ins().iconst(ty, 0) - } else { - // Explicitly check for overflow: Trap when x == INT_MIN. - debug_assert!(avoid_div_traps, "Native trapping divide handled above"); - let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1)); - pos.ins() - .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow); - // x / -1 = -x. - pos.ins().irsub_imm(x, 0) - }; - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[m1_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, nominal); - cfg.recompute_block(pos.func, minus_one); - cfg.recompute_block(pos.func, done); -} - -/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`. -fn expand_udivrem( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_urem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Udiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Urem, - args, - } => (args[0], args[1], true), - _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)), - }; - let avoid_div_traps = isa.flags().avoid_div_traps(); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps { - let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) { - // Ideally, we'd just replace the conditional trap with a trap when the immediate is - // zero, but this requires more manipulation of the dfg/cfg, which is out of scope - // here. - imm == 0 - } else { - true - }; - if zero_check { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - } - - // Now it is safe to execute the `x86_udivmodx` instruction. - let xhi = pos.ins().iconst(ty, 0); - let reuse = if is_urem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y); - pos.remove_inst(); -} - -/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax` -/// instructions. -fn expand_minmax( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - - // We need to handle the following conditions, depending on how x and y compare: - // - // 1. LT or GT: The native `x86_opc` min/max instruction does what we need. - // 2. EQ: We need to use `bitwise_opc` to make sure that - // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0. - // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical. - - // block handling case 1) where operands are ordered but not equal. - let one_block = func.dfg.make_block(); - - // block handling case 3) where one operand is NaN. - let uno_block = func.dfg.make_block(); - - // block that handles the unordered or equal cases 2) and 3). - let ueq_block = func.dfg.make_block(); - - // block handling case 2) where operands are ordered and equal. - let eq_block = func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = func.dfg.make_block(); - - // The basic blocks are laid out to minimize branching for the common cases: - // - // 1) One branch not taken, one jump. - // 2) One branch taken. - // 3) Two branches taken, one jump. - - // Move the `inst` result value onto the `done` block. - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - // Test for case 1) ordered and not equal. - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y); - pos.ins().brnz(cmp_ueq, ueq_block, &[]); - pos.ins().jump(one_block, &[]); - - // Handle the common ordered, not equal (LT|GT) case. - pos.insert_block(one_block); - let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0; - let one_result = pos.func.dfg.first_result(one_inst); - pos.ins().jump(done, &[one_result]); - - // Case 3) Unordered. - // We know that at least one operand is a NaN that needs to be propagated. We simply use an - // `fadd` instruction which has the same NaN propagation semantics. - pos.insert_block(uno_block); - let uno_result = pos.ins().fadd(x, y); - pos.ins().jump(done, &[uno_result]); - - // Case 2) or 3). - pos.insert_block(ueq_block); - // Test for case 3) (UN) one value is NaN. - // TODO: When we get support for flag values, we can reuse the above comparison. - let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y); - pos.ins().brnz(cmp_uno, uno_block, &[]); - pos.ins().jump(eq_block, &[]); - - // We are now in case 2) where x and y compare EQ. - // We need a bitwise operation to get the sign right. - pos.insert_block(eq_block); - let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0; - let bw_result = pos.func.dfg.first_result(bw_inst); - // This should become a fall-through for this second most common case. - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[bw_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, one_block); - cfg.recompute_block(pos.func, uno_block); - cfg.recompute_block(pos.func, ueq_block); - cfg.recompute_block(pos.func, eq_block); - cfg.recompute_block(pos.func, done); -} - -/// This legalization converts a minimum/maximum operation into a sequence that matches the -/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from -/// [expand_minmax] above (the scalar version) for code clarity. -fn expand_minmax_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let ty = func.dfg.ctrl_typevar(inst); - debug_assert!(ty.is_vector()); - let (x, y, x86_opcode, is_max) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, true), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in - // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0} - // it returns the second operand. To match the behavior of "return the minimum of the - // operands or a canonical NaN if either operand is NaN," we must compare in both - // directions. - let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y); - let forward = dfg.first_result(forward_inst); - let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x); - let backward = dfg.first_result(backward_inst); - - let (value, mask) = if is_max { - // For maximum: - // Find any differences between the forward and backward `max` operation. - let difference = pos.ins().bxor(forward, backward); - // Merge in the differences. - let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference); - let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference); - // Discover which lanes have NaNs in them. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value); - (value, find_nan_lanes_mask) - } else { - // For minimum: - // If either lane is a NaN, we want to use these bits, not the second operand bits. - let propagate_nans = pos.ins().bor(backward, forward); - // Find which lanes contain a NaN with an unordered comparison, filling the mask with - // 1s. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans); - let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask); - // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs - // along this code path to be quieted and negative: after the upcoming shift and and_not, - // all upper bits (sign, exponent, and payload MSB) will be 1s. - let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask); - (tmp, bitcast_find_nan_lanes_mask) - }; - - // During this lowering we will need to know how many bits to shift by and what type to - // convert to when using an integer shift. Recall that an IEEE754 number looks like: - // `[sign bit] [exponent bits] [significand bits]` - // A quiet NaN has all exponent bits set to 1 and the most significant bit of the - // significand set to 1; a signaling NaN has the same exponent but the MSB of the - // significand is set to 0. The payload of the NaN is the remaining significand bits, and - // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this - // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB - // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all - // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign - // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0 - let (shift_by, ty_as_int) = match ty { - F32X4 => (10, I32X4), - F64X2 => (13, I64X2), - _ => unimplemented!("this legalization only understands 128-bit floating point types"), - }; - - // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all - // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has - // little effect. - let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask); - let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by); - let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask); - - // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is - // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits, - // clearing the payload. - pos.func - .dfg - .replace(inst) - .band_not(value, shift_mask_as_float); -} - -/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to -/// i64 with a pattern, the rest needs more code. -/// -/// Note that this is the scalar implementation; for the vector implemenation see -/// [expand_fcvt_from_uint_vector]. -fn expand_fcvt_from_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let x; - match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } => x = arg, - _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)), - } - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Conversion from an unsigned int smaller than 64bit is easy on x86-64. - match xty { - ir::types::I8 | ir::types::I16 | ir::types::I32 => { - // TODO: This should be guarded by an ISA check. - let wide = pos.ins().uextend(ir::types::I64, x); - pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide); - return; - } - ir::types::I64 => {} - _ => unimplemented!(), - } - - let old_block = pos.func.layout.pp_block(inst); - - // block handling the case where x >= 0. - let poszero_block = pos.func.dfg.make_block(); - - // block handling the case where x < 0. - let neg_block = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.clear_results(inst); - pos.func.dfg.attach_block_param(done, result); - - // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction. - let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0); - pos.ins().brnz(is_neg, neg_block, &[]); - pos.ins().jump(poszero_block, &[]); - - // Easy case: just use a signed conversion. - pos.insert_block(poszero_block); - let posres = pos.ins().fcvt_from_sint(ty, x); - pos.ins().jump(done, &[posres]); - - // Now handle the negative case. - pos.insert_block(neg_block); - - // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it - // back up on the FP side. - let ihalf = pos.ins().ushr_imm(x, 1); - let lsb = pos.ins().band_imm(x, 1); - let ifinal = pos.ins().bor(ihalf, lsb); - let fhalf = pos.ins().fcvt_from_sint(ty, ifinal); - let negres = pos.ins().fadd(fhalf, fhalf); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[negres]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, poszero_block); - cfg.recompute_block(pos.func, neg_block); - cfg.recompute_block(pos.func, done); -} - -/// To convert packed unsigned integers to their float equivalents, we must legalize to a special -/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is -/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform -/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be -/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_from_uint_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == F32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4); - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg); - } else { - // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR, - // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS - let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg); - let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into()); - let zero = pos.ins().vconst(I16X8, zero_constant); - let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55); - let bitcast_low = pos.ins().raw_bitcast(I32X4, low); - let high = pos.ins().isub(arg, bitcast_low); - let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low); - let shift_high = pos.ins().ushr_imm(high, 1); - let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high); - let double_high = pos.ins().fadd(convert_high, convert_high); - pos.func.dfg.replace(inst).fadd(double_high, convert_low); - } - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_sint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSint, - arg, - } => arg, - _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // block for checking failure cases. - let maybe_trap_block = func.dfg.make_block(); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow. - // It produces an INT_MIN result instead. - func.dfg.replace(inst).x86_cvtt2si(ty, x); - - let mut pos = FuncCursor::new(func).after_inst(inst); - pos.use_srcloc(inst); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done, &[]); - pos.ins().jump(maybe_trap_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> trap bad_toint - // 3. The input was out of range -> trap int_ovf - // - pos.insert_block(maybe_trap_block); - - // Check for NaN. - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins() - .trapnz(is_nan, ir::TrapCode::BadConversionToInteger); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - // Finally, we could have a positive value that is too large. - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - pos.ins().jump(done, &[]); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, maybe_trap_block); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_sint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_sint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done_block = func.dfg.make_block(); - let intmin_block = func.dfg.make_block(); - let minsat_block = func.dfg.make_block(); - let maxsat_block = func.dfg.make_block(); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done_block, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or - // overflow. It produces an INT_MIN result instead. - let cvtt2si = pos.ins().x86_cvtt2si(ty, x); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done_block, &[cvtt2si]); - pos.ins().jump(intmin_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> replace the result value with 0. - // 3. The input was out of range -> saturate the result to the min/max value. - pos.insert_block(intmin_block); - - // Check for NaN, which is truncated to 0. - let zero = pos.ins().iconst(ty, 0); - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins().brnz(is_nan, done_block, &[zero]); - pos.ins().jump(minsat_block, &[]); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - pos.insert_block(minsat_block); - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - let min_imm = match ty { - ir::types::I32 => i32::min_value() as i64, - ir::types::I64 => i64::min_value(), - _ => panic!("Don't know the min value for {}", ty), - }; - let min_value = pos.ins().iconst(ty, min_imm); - pos.ins().brnz(overflow, done_block, &[min_value]); - pos.ins().jump(maxsat_block, &[]); - - // Finally, we could have a positive value that is too large. - pos.insert_block(maxsat_block); - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - - let max_imm = match ty { - ir::types::I32 => i32::max_value() as i64, - ir::types::I64 => i64::max_value(), - _ => panic!("Don't know the max value for {}", ty), - }; - let max_value = pos.ins().iconst(ty, max_imm); - - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().brnz(overflow, done_block, &[max_value]); - - // Recycle the original instruction. - pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done_block); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, intmin_block); - cfg.recompute_block(pos.func, minsat_block); - cfg.recompute_block(pos.func, maxsat_block); - cfg.recompute_block(pos.func, done_block); -} - -/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes -/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat] -/// above (the scalar version), only due to how the transform groups are set up; TODO if we change -/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat] -/// (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_sint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest signed integer - // allowed in that lane. - - // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to - // avoid doing the comparison twice (we need the zeroed lanes to find differences). - let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg); - let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans); - let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast); - - // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or - // NaN, 0 otherwise). - let differences = pos.ins().bxor(zeroed_nans_bitcast, arg); - let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences); - - // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set). - let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy); - - // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB - // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive - // overflow). - let tmp = pos.ins().band(differences_bitcast, converted); - let mask = pos.ins().sshr_imm(tmp, 31); - - // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other - // cases) has no effect. - pos.func.dfg.replace(inst).bxor(converted, mask); - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUint, - arg, - } => arg, - _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_uint_max_block = func.dfg.make_block(); - - // block handle numbers < 0. - let below_zero_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_uint_max_block, &[]); - - // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the - // previous comparison. - pos.insert_block(below_uint_max_block); - pos.ins().trapff( - FloatCC::Unordered, - is_large, - ir::TrapCode::BadConversionToInteger, - ); - - // Now we know that x < 2^(N-1) and not NaN. - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(below_zero_block, &[]); - - pos.insert_block(below_zero_block); - pos.ins().trap(ir::TrapCode::IntegerOverflow); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_uint_max_block); - cfg.recompute_block(pos.func, below_zero_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_uint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_uint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_pow2nm1_or_nan_block = func.dfg.make_block(); - let below_pow2nm1_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // block handling numbers < 2^N. - let uint_large_block = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let zero = pos.ins().iconst(ty, 0); - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_pow2nm1_or_nan_block, &[]); - - // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. - pos.insert_block(below_pow2nm1_or_nan_block); - pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); - pos.ins().jump(below_pow2nm1_block, &[]); - - // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're - // done; otherwise saturate to the minimum unsigned value, that is 0. - pos.insert_block(below_pow2nm1_block); - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(done, &[zero]); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let max_value = pos.ins().iconst( - ty, - match ty { - ir::types::I32 => u32::max_value() as i64, - ir::types::I64 => u64::max_value() as i64, - _ => panic!("Can't convert {}", ty), - }, - ); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); - pos.ins().jump(uint_large_block, &[]); - - pos.insert_block(uint_large_block); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block); - cfg.recompute_block(pos.func, below_pow2nm1_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, uint_large_block); - cfg.recompute_block(pos.func, done); -} - -// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4. -static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [ - 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, -]; - -/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes -/// using a long sequence of NaN quieting and truncation. This logic is separate from -/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are -/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into -/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_uint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest integer - // allowed in that lane. - let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into()); - let max_signed_constant = pos - .func - .dfg - .constants - .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into()); - let zeroes = pos.ins().vconst(F32X4, zeroes_constant); - let max_signed = pos.ins().vconst(F32X4, max_signed_constant); - // Clamp the input to 0 for negative floating point numbers. TODO we need to - // convert NaNs to 0 but this doesn't do that? - let ge_zero = pos.ins().x86_fmax(arg, zeroes); - // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert. - // For floating point numbers above this, CVTTPS2DQ returns the undefined value - // 0x80000000. - let minus_max_signed = pos.ins().fsub(ge_zero, max_signed); - let le_max_signed = - pos.ins() - .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed); - // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0. - // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a - // valid, in-range addend. - let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed); - let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed); - let difference = pos - .ins() - .bxor(minus_max_signed_as_int, le_max_signed_as_int); - // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified - // previously (MSB set to 1). - let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes); - let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int); - // Convert the original clamped number to an integer and add back in the addend - // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these). - let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero); - pos.func.dfg.replace(inst).iadd(converted, addend); - } else { - unreachable!( - "{} should not be legalized in expand_fcvt_to_uint_sat_vector", - pos.func.dfg.display_inst(inst, None) - ) - } - } -} - -/// Convert shuffle instructions. -fn convert_shuffle( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] { - // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1 - // in the most significant position zeroes the lane. - let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b }; - - // We only have to worry about aliasing here because copies will be introduced later (in - // regalloc). - let a = pos.func.dfg.resolve_aliases(args[0]); - let b = pos.func.dfg.resolve_aliases(args[1]); - let mask = pos - .func - .dfg - .immediates - .get(mask) - .expect("The shuffle immediate should have been recorded before this point") - .clone(); - if a == b { - // PSHUFB the first argument (since it is the same as the second). - let constructed_mask = mask - .iter() - // If the mask is greater than 15 it still may be referring to a lane in b. - .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b }) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the single incoming argument. - pos.func.dfg.replace(inst).x86_pshufb(a, mask_value); - } else { - // PSHUFB the first argument, placing zeroes for unused lanes. - let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the first argument. - let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value); - - // PSHUFB the second argument, placing zeroes for unused lanes. - let constructed_mask = mask - .iter() - .map(|b| b.wrapping_sub(16)) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let b_type = pos.func.dfg.value_type(b); - let mask_value = pos.ins().vconst(b_type, handle); - // Shuffle the second argument. - let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value); - - // OR the vectors together to form the final shuffled value. - pos.func - .dfg - .replace(inst) - .bor(shuffled_first_arg, shuffled_second_arg); - - // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB - }; - } -} - -/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF -/// extractlane instruction -fn convert_extractlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::BinaryImm8 { - opcode: ir::Opcode::Extractlane, - arg, - imm: lane, - } = pos.func.dfg[inst] - { - // NOTE: the following legalization assumes that the upper bits of the XMM register do - // not need to be zeroed during extractlane. - let value_type = pos.func.dfg.value_type(arg); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - let shuffled = if lane != 0 { - // Replace the extractlane with a PSHUFD to get the float in the right place. - match value_type { - F32X4 => { - // Move the selected lane to the 0 lane. - let shuffle_mask: u8 = 0b00_00_00_00 | lane; - pos.ins().x86_pshufd(arg, shuffle_mask) - } - F64X2 => { - assert_eq!(lane, 1); - // Because we know the lane == 1, we move the upper 64 bits to the lower - // 64 bits, leaving the top 64 bits as-is. - let shuffle_mask = 0b11_10_11_10; - let bitcast = pos.ins().raw_bitcast(F32X4, arg); - pos.ins().x86_pshufd(bitcast, shuffle_mask) - } - _ => unreachable!(), - } - } else { - // Remove the extractlane instruction, leaving the float where it is. - arg - }; - // Then we must bitcast to the right type. - pos.func - .dfg - .replace(inst) - .raw_bitcast(value_type.lane_type(), shuffled); - } else { - // For non-floats, lower with the usual PEXTR* instruction. - pos.func.dfg.replace(inst).x86_pextr(arg, lane); - } - } -} - -/// Because floats exist in XMM registers, we can keep them there when executing a CLIF -/// insertlane instruction -fn convert_insertlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::TernaryImm8 { - opcode: ir::Opcode::Insertlane, - args: [vector, replacement], - imm: lane, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(vector); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - match value_type { - F32X4 => { - assert!(lane <= 3); - let immediate = 0b00_00_00_00 | lane << 4; - // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane - // shifted into bits 5:6). - pos.func - .dfg - .replace(inst) - .x86_insertps(vector, replacement, immediate) - } - F64X2 => { - let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types - if lane == 0 { - // Move the lowest quadword in replacement to vector without changing - // the upper bits. - pos.func - .dfg - .replace(inst) - .x86_movsd(vector, replacement_as_vector) - } else { - assert_eq!(lane, 1); - // Move the low 64 bits of replacement vector to the high 64 bits of the - // vector. - pos.func - .dfg - .replace(inst) - .x86_movlhps(vector, replacement_as_vector) - } - } - _ => unreachable!(), - }; - } else { - // For non-floats, lower with the usual PINSR* instruction. - pos.func - .dfg - .replace(inst) - .x86_pinsr(vector, replacement, lane); - } - } -} - -/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`. -fn convert_ineg( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::Ineg, - arg, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(arg); - let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() { - let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into()); - pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR - } else if value_type.is_int() { - pos.ins().iconst(value_type, 0) - } else { - panic!("Can't convert ineg of type {}", value_type) - }; - pos.func.dfg.replace(inst).isub(zero_value, arg); - } else { - unreachable!() - } -} - -fn expand_dword_to_xmm<'f>( - pos: &mut FuncCursor<'_>, - arg: ir::Value, - arg_type: ir::Type, -) -> ir::Value { - if arg_type == I64 { - let (arg_lo, arg_hi) = pos.ins().isplit(arg); - let arg = pos.ins().scalar_to_vector(I32X4, arg_lo); - let arg = pos.ins().insertlane(arg, arg_hi, 1); - let arg = pos.ins().raw_bitcast(I64X2, arg); - arg - } else { - pos.ins().bitcast(I64X2, arg) - } -} - -fn contract_dword_from_xmm<'f>( - pos: &mut FuncCursor<'f>, - inst: ir::Inst, - ret: ir::Value, - ret_type: ir::Type, -) { - if ret_type == I64 { - let ret = pos.ins().raw_bitcast(I32X4, ret); - let ret_lo = pos.ins().extractlane(ret, 0); - let ret_hi = pos.ins().extractlane(ret, 1); - pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi); - } else { - let ret = pos.ins().extractlane(ret, 0); - pos.func.dfg.replace(inst).ireduce(ret_type, ret); - } -} - -// Masks for i8x16 unsigned right shift. -static USHR_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -]; - -// Convert a vector unsigned right shift. x86 has implementations for i16x8 and up (see `x86_pslr`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the upper bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ushr( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ushr, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psrl(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect upper bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psrl(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -// Masks for i8x16 left shift. -static SHL_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, - 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, - 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, - 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -]; - -// Convert a vector left shift. x86 has implementations for i16x8 and up (see `x86_psll`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the lower bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ishl( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ishl, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psll(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect lower bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psll(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psll(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2. -fn convert_i64x2_imul( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Imul, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - let ty = pos.func.dfg.ctrl_typevar(inst); - if ty == I64X2 { - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1); - } else { - // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each - // 64-bit lane into 32-bit high and low sections using shifting and then performs - // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 = - // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1). - let high0 = pos.ins().ushr_imm(arg0, 32); - let mul0 = pos.ins().x86_pmuludq(high0, arg1); - let high1 = pos.ins().ushr_imm(arg1, 32); - let mul1 = pos.ins().x86_pmuludq(high1, arg0); - let addhigh = pos.ins().iadd(mul0, mul1); - let high = pos.ins().ishl_imm(addhigh, 32); - let low = pos.ins().x86_pmuludq(arg0, arg1); - pos.func.dfg.replace(inst).iadd(low, high); - } - } else { - unreachable!( - "{} should be encodable; it cannot be legalized by convert_i64x2_imul", - pos.func.dfg.display_inst(inst, None) - ); - } - } -} - -fn expand_tls_value( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - use crate::settings::TlsModel; - - assert!( - isa.triple().architecture == target_lexicon::Architecture::X86_64, - "Not yet implemented for {:?}", - isa.triple(), - ); - - if let ir::InstructionData::UnaryGlobalValue { - opcode: ir::Opcode::TlsValue, - global_value, - } = func.dfg[inst] - { - let ctrl_typevar = func.dfg.ctrl_typevar(inst); - assert_eq!(ctrl_typevar, ir::types::I64); - - match isa.flags().tls_model() { - TlsModel::None => panic!("tls_model flag is not set."), - TlsModel::ElfGd => { - func.dfg.replace(inst).x86_elf_tls_get_addr(global_value); - } - TlsModel::Macho => { - func.dfg.replace(inst).x86_macho_tls_get_addr(global_value); - } - model => unimplemented!("tls_value for tls model {:?}", model), - } - } else { - unreachable!(); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/mod.rs b/cranelift/codegen/src/isa/legacy/x86/mod.rs deleted file mode 100644 index e61fda1931..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/mod.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! x86 Instruction Set Architectures. - -mod abi; -mod binemit; -mod enc_tables; -mod registers; -pub mod settings; -#[cfg(feature = "unwind")] -pub mod unwind; - -use super::super::settings as shared_settings; -#[cfg(feature = "testing_hooks")] -use crate::binemit::CodeSink; -use crate::binemit::{emit_function, MemoryCodeSink}; -use crate::ir; -use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; -use crate::isa::Builder as IsaBuilder; -#[cfg(feature = "unwind")] -use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit}; -use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; -use crate::regalloc; -use crate::result::CodegenResult; -use crate::timing; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; -use core::fmt; -use core::hash::{Hash, Hasher}; -use target_lexicon::{PointerWidth, Triple}; - -#[allow(dead_code)] -struct Isa { - triple: Triple, - shared_flags: shared_settings::Flags, - isa_flags: settings::Flags, - cpumode: &'static [shared_enc_tables::Level1Entry], -} - -/// Get an ISA builder for creating x86 targets. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - IsaBuilder { - triple, - setup: settings::builder(), - constructor: isa_constructor, - } -} - -fn isa_constructor( - triple: Triple, - shared_flags: shared_settings::Flags, - builder: shared_settings::Builder, -) -> Box { - let level1 = match triple.pointer_width().unwrap() { - PointerWidth::U16 => unimplemented!("x86-16"), - PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], - PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], - }; - - let isa_flags = settings::Flags::new(&shared_flags, builder); - - Box::new(Isa { - triple, - isa_flags, - shared_flags, - cpumode: level1, - }) -} - -impl TargetIsa for Isa { - fn name(&self) -> &'static str { - "x86" - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.shared_flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.shared_flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - - fn uses_cpu_flags(&self) -> bool { - true - } - - fn uses_complex_addresses(&self) -> bool { - true - } - - fn register_info(&self) -> RegInfo { - registers::INFO.clone() - } - - #[cfg(feature = "unwind")] - fn map_dwarf_register(&self, reg: RegUnit) -> Result { - unwind::systemv::map_reg(self, reg).map(|r| r.0) - } - - fn encoding_info(&self) -> EncInfo { - enc_tables::INFO.clone() - } - - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - lookup_enclist( - ctrl_typevar, - inst, - func, - self.cpumode, - &enc_tables::LEVEL2[..], - &enc_tables::ENCLISTS[..], - &enc_tables::LEGALIZE_ACTIONS[..], - &enc_tables::RECIPE_PREDICATES[..], - &enc_tables::INST_PREDICATES[..], - self.isa_flags.predicate_view(), - ) - } - - fn legalize_signature(&self, sig: &mut Cow, current: bool) { - abi::legalize_signature( - sig, - &self.triple, - current, - &self.shared_flags, - &self.isa_flags, - ) - } - - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { - abi::regclass_for_abi_type(ty) - } - - fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet { - abi::allocatable_registers(&self.triple, &self.shared_flags) - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn CodeSink, - ) { - binemit::emit_inst(func, inst, divert, sink, self) - } - - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { - emit_function(func, binemit::emit_inst, sink, self) - } - - fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { - let _tt = timing::prologue_epilogue(); - abi::prologue_epilogue(func, self) - } - - fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - #[cfg(feature = "unwind")] - fn create_unwind_info( - &self, - func: &ir::Function, - ) -> CodegenResult> { - abi::create_unwind_info(func, self) - } - - #[cfg(feature = "unwind")] - fn create_systemv_cie(&self) -> Option { - Some(unwind::systemv::create_cie()) - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } -} - -impl fmt::Display for Isa { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}", self.shared_flags, self.isa_flags) - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/registers.rs b/cranelift/codegen/src/isa/legacy/x86/registers.rs deleted file mode 100644 index a7518b268b..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/registers.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! x86 register descriptions. - -use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; - -include!(concat!(env!("OUT_DIR"), "/registers-x86.rs")); - -#[cfg(test)] -mod tests { - use super::*; - use crate::isa::RegUnit; - use alloc::string::{String, ToString}; - - #[test] - fn unit_encodings() { - fn gpr(unit: usize) -> Option { - Some(GPR.unit(unit)) - } - // The encoding of integer registers is not alphabetical. - assert_eq!(INFO.parse_regunit("rax"), gpr(0)); - assert_eq!(INFO.parse_regunit("rbx"), gpr(3)); - assert_eq!(INFO.parse_regunit("rcx"), gpr(1)); - assert_eq!(INFO.parse_regunit("rdx"), gpr(2)); - assert_eq!(INFO.parse_regunit("rsi"), gpr(6)); - assert_eq!(INFO.parse_regunit("rdi"), gpr(7)); - assert_eq!(INFO.parse_regunit("rbp"), gpr(5)); - assert_eq!(INFO.parse_regunit("rsp"), gpr(4)); - assert_eq!(INFO.parse_regunit("r8"), gpr(8)); - assert_eq!(INFO.parse_regunit("r15"), gpr(15)); - - fn fpr(unit: usize) -> Option { - Some(FPR.unit(unit)) - } - assert_eq!(INFO.parse_regunit("xmm0"), fpr(0)); - assert_eq!(INFO.parse_regunit("xmm15"), fpr(15)); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(unit: usize) -> Option { - // Some(FPR32.unit(unit)) - // } - // assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0)); - // assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31)); - } - - #[test] - fn unit_names() { - fn gpr(ru: RegUnit) -> String { - INFO.display_regunit(GPR.first + ru).to_string() - } - assert_eq!(gpr(0), "%rax"); - assert_eq!(gpr(3), "%rbx"); - assert_eq!(gpr(1), "%rcx"); - assert_eq!(gpr(2), "%rdx"); - assert_eq!(gpr(6), "%rsi"); - assert_eq!(gpr(7), "%rdi"); - assert_eq!(gpr(5), "%rbp"); - assert_eq!(gpr(4), "%rsp"); - assert_eq!(gpr(8), "%r8"); - assert_eq!(gpr(15), "%r15"); - - fn fpr(ru: RegUnit) -> String { - INFO.display_regunit(FPR.first + ru).to_string() - } - assert_eq!(fpr(0), "%xmm0"); - assert_eq!(fpr(15), "%xmm15"); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(ru: RegUnit) -> String { - // INFO.display_regunit(FPR32.first + ru).to_string() - // } - // assert_eq!(fpr32(0), "%xmm0"); - // assert_eq!(fpr32(31), "%xmm31"); - } - - #[test] - fn regclasses() { - assert_eq!(GPR.intersect_index(GPR), Some(GPR.into())); - assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(GPR.intersect_index(FPR), None); - assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(FPR), None); - assert_eq!(FPR.intersect_index(FPR), Some(FPR.into())); - assert_eq!(FPR.intersect_index(GPR), None); - assert_eq!(FPR.intersect_index(ABCD), None); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/settings.rs b/cranelift/codegen/src/isa/legacy/x86/settings.rs deleted file mode 100644 index f13431c1a2..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/settings.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! x86 Settings. - -use crate::settings::{self, detail, Builder, Value}; -use core::fmt; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -// `cranelift-codegen/meta/src/isa/x86/settings.rs`. -include!(concat!(env!("OUT_DIR"), "/settings-x86.rs")); - -#[cfg(test)] -mod tests { - use super::{builder, Flags}; - use crate::settings::{self, Configurable}; - - #[test] - fn presets() { - let shared = settings::Flags::new(settings::builder()); - - // Nehalem has SSE4.1 but not BMI1. - let mut b0 = builder(); - b0.enable("nehalem").unwrap(); - let f0 = Flags::new(&shared, b0); - assert_eq!(f0.has_sse41(), true); - assert_eq!(f0.has_bmi1(), false); - - let mut b1 = builder(); - b1.enable("haswell").unwrap(); - let f1 = Flags::new(&shared, b1); - assert_eq!(f1.has_sse41(), true); - assert_eq!(f1.has_bmi1(), true); - } - #[test] - fn display_presets() { - // Spot check that the flags Display impl does not cause a panic - let shared = settings::Flags::new(settings::builder()); - - let b0 = builder(); - let f0 = Flags::new(&shared, b0); - let _ = format!("{}", f0); - - let mut b1 = builder(); - b1.enable("nehalem").unwrap(); - let f1 = Flags::new(&shared, b1); - let _ = format!("{}", f1); - - let mut b2 = builder(); - b2.enable("haswell").unwrap(); - let f2 = Flags::new(&shared, b2); - let _ = format!("{}", f2); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind.rs b/cranelift/codegen/src/isa/legacy/x86/unwind.rs deleted file mode 100644 index 2eed8b74e4..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind.rs +++ /dev/null @@ -1,531 +0,0 @@ -//! Module for x86 unwind generation for supported ABIs. - -pub mod systemv; -pub mod winx64; - -use crate::ir::{Function, InstructionData, Opcode, ValueLoc}; -use crate::isa::x86::registers::{FPR, RU}; -use crate::isa::{RegUnit, TargetIsa}; -use crate::result::CodegenResult; -use alloc::vec::Vec; -use std::collections::HashMap; - -use crate::isa::unwind::input::{UnwindCode, UnwindInfo}; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult>> { - // Find last block based on max offset. - let last_block = func - .layout - .blocks() - .max_by_key(|b| func.offsets[*b]) - .expect("at least a block"); - // Find last instruction offset + size, and make it function size. - let function_size = func - .inst_offsets(last_block, &isa.encoding_info()) - .fold(0, |_, (offset, _, size)| offset + size); - - let entry_block = func.layout.entry_block().expect("missing entry block"); - let prologue_end = func.prologue_end.unwrap(); - let epilogues_start = func - .epilogues_start - .iter() - .map(|(i, b)| (*b, *i)) - .collect::>(); - - let word_size = isa.pointer_bytes(); - - let mut stack_size = None; - let mut prologue_size = 0; - let mut prologue_unwind_codes = Vec::new(); - let mut epilogues_unwind_codes = Vec::new(); - let mut frame_register: Option = None; - - // Process only entry block and blocks with epilogues. - let mut blocks = func - .epilogues_start - .iter() - .map(|(_, b)| *b) - .collect::>(); - if !blocks.contains(&entry_block) { - blocks.push(entry_block); - } - blocks.sort_by_key(|b| func.offsets[*b]); - - for block in blocks.iter() { - let mut in_prologue = block == &entry_block; - let mut in_epilogue = false; - let mut epilogue_pop_offsets = Vec::new(); - - let epilogue_start = epilogues_start.get(block); - let is_last_block = block == &last_block; - - for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) { - let offset = offset + size; - - let unwind_codes; - if in_prologue { - // Check for prologue end (inclusive) - if prologue_end == inst { - in_prologue = false; - } - prologue_size += size; - unwind_codes = &mut prologue_unwind_codes; - } else if !in_epilogue && epilogue_start == Some(&inst) { - // Now in an epilogue, emit a remember state instruction if not last block - in_epilogue = true; - - epilogues_unwind_codes.push(Vec::new()); - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RememberState)); - } - } else if in_epilogue { - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - } else { - // Ignore normal instructions - continue; - } - - match func.dfg[inst] { - InstructionData::Unary { opcode, arg } => { - match opcode { - Opcode::X86Push => { - let reg = func.locations[arg].unwrap_reg(); - unwind_codes.push(( - offset, - UnwindCode::StackAlloc { - size: word_size.into(), - }, - )); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg, - stack_offset: 0, - }, - )); - } - Opcode::AdjustSpDown => { - let stack_size = - stack_size.expect("expected a previous stack size instruction"); - - // This is used when calling a stack check function - // We need to track the assignment to RAX which has the size of the stack - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: stack_size })); - } - _ => {} - } - } - InstructionData::UnaryImm { opcode, imm } => { - match opcode { - Opcode::Iconst => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - assert!(stack_size.is_none()); - - // This instruction should only appear in a prologue to pass an - // argument of the stack size to a stack check function. - // Record the stack size so we know what it is when we encounter the adjustment - // instruction (which will adjust via the register assigned to this instruction). - stack_size = Some(imm as u32); - } - Opcode::AdjustSpDownImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: imm as u32 })); - } - Opcode::AdjustSpUpImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackDealloc { size: imm as u32 })); - } - _ => {} - } - } - InstructionData::Store { - opcode: Opcode::Store, - args: [arg1, arg2], - offset: stack_offset, - .. - } => { - if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) = - (func.locations[arg1], func.locations[arg2]) - { - // If this is a save of an FPR, record an unwind operation - // Note: the stack_offset here is relative to an adjusted SP - if dst == (RU::rsp as RegUnit) && FPR.contains(src) { - let stack_offset: i32 = stack_offset.into(); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg: src, - stack_offset: stack_offset as u32, - }, - )); - } - } - } - InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => { - // Check for change in CFA register (RSP is always the starting CFA) - if src == (RU::rsp as RegUnit) { - unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst })); - frame_register = Some(dst); - } - } - InstructionData::NullAry { opcode } => match opcode { - Opcode::X86Pop => { - epilogue_pop_offsets.push(offset); - } - _ => {} - }, - InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode { - Opcode::Return => { - let args = func.dfg.inst_args(inst); - for (i, arg) in args.iter().rev().enumerate() { - // Only walk back the args for the pop instructions encountered - if i >= epilogue_pop_offsets.len() { - break; - } - - let offset = epilogue_pop_offsets[i]; - - let reg = func.locations[*arg].unwrap_reg(); - unwind_codes.push((offset, UnwindCode::RestoreRegister { reg })); - unwind_codes.push(( - offset, - UnwindCode::StackDealloc { - size: word_size.into(), - }, - )); - - if Some(reg) == frame_register { - unwind_codes.push((offset, UnwindCode::RestoreFramePointer)); - // Keep frame_register assigned for next epilogue. - } - } - epilogue_pop_offsets.clear(); - - // TODO ensure unwind codes sorted by offsets ? - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RestoreState)); - } - - in_epilogue = false; - } - _ => {} - }, - _ => {} - }; - } - } - - Ok(Some(UnwindInfo { - prologue_size, - prologue_unwind_codes, - epilogues_unwind_codes, - function_size, - word_size, - initial_sp_offset: word_size, - })) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 9, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (9, UnwindCode::StackAlloc { size: 64 }) - ], - epilogues_unwind_codes: vec![vec![ - (13, UnwindCode::StackDealloc { size: 64 }), - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ]], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 10000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 10000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 1000000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 1000000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 5, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into() - } - ) - ], - epilogues_unwind_codes: vec![ - vec![ - (12, UnwindCode::RememberState), - ( - 12, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (12, UnwindCode::StackDealloc { size: 8 }), - (12, UnwindCode::RestoreFramePointer), - (13, UnwindCode::RestoreState) - ], - vec![ - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ] - ], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs deleted file mode 100644 index 31fc64c9fb..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Unwind information for System V ABI (x86-64). - -use crate::ir::Function; -use crate::isa::{ - unwind::systemv::{RegisterMappingError, UnwindInfo}, - RegUnit, TargetIsa, -}; -use crate::result::CodegenResult; -use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64}; - -/// Creates a new x86-64 common information entry (CIE). -pub fn create_cie() -> CommonInformationEntry { - use gimli::write::CallFrameInstruction; - - let mut entry = CommonInformationEntry::new( - Encoding { - address_size: 8, - format: Format::Dwarf32, - version: 1, - }, - 1, // Code alignment factor - -8, // Data alignment factor - X86_64::RA, - ); - - // Every frame will start with the call frame address (CFA) at RSP+8 - // It is +8 to account for the push of the return address by the call instruction - entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8)); - - // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP) - entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8)); - - entry -} - -/// Map Cranelift registers to their corresponding Gimli registers. -pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result { - if isa.name() != "x86" || isa.pointer_bits() != 64 { - return Err(RegisterMappingError::UnsupportedArchitecture); - } - - // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow - const X86_GP_REG_MAP: [gimli::Register; 16] = [ - X86_64::RAX, - X86_64::RCX, - X86_64::RDX, - X86_64::RBX, - X86_64::RSP, - X86_64::RBP, - X86_64::RSI, - X86_64::RDI, - X86_64::R8, - X86_64::R9, - X86_64::R10, - X86_64::R11, - X86_64::R12, - X86_64::R13, - X86_64::R14, - X86_64::R15, - ]; - const X86_XMM_REG_MAP: [gimli::Register; 16] = [ - X86_64::XMM0, - X86_64::XMM1, - X86_64::XMM2, - X86_64::XMM3, - X86_64::XMM4, - X86_64::XMM5, - X86_64::XMM6, - X86_64::XMM7, - X86_64::XMM8, - X86_64::XMM9, - X86_64::XMM10, - X86_64::XMM11, - X86_64::XMM12, - X86_64::XMM13, - X86_64::XMM14, - X86_64::XMM15, - ]; - - let reg_info = isa.register_info(); - let bank = reg_info - .bank_containing_regunit(reg) - .ok_or_else(|| RegisterMappingError::MissingBank)?; - match bank.name { - "IntRegs" => { - // x86 GP registers have a weird mapping to DWARF registers, so we use a - // lookup table. - Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize]) - } - "FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]), - _ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)), - } -} - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only System V-like calling conventions are supported - match isa.unwind_info_kind() { - crate::machinst::UnwindInfoKind::SystemV => {} - _ => return Ok(None), - } - - if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b)); - impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper for RegisterMapper<'a, 'b> { - fn map(&self, reg: RegUnit) -> Result { - Ok(map_reg(self.0, reg)?.0) - } - fn sp(&self) -> u16 { - X86_64::RSP.0 - } - fn fp(&self) -> Option { - Some(X86_64::RBP.0) - } - } - let map = RegisterMapper(isa); - - Ok(Some(UnwindInfo::build(unwind, &map)?)) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use gimli::write::Address; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_simple_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(1234)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(4321)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs deleted file mode 100644 index 33e5463bb8..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs +++ /dev/null @@ -1,265 +0,0 @@ -//! Unwind information for Windows x64 ABI. - -use crate::ir::Function; -use crate::isa::x86::registers::{FPR, GPR}; -use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa}; -use crate::result::CodegenResult; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only Windows fastcall is supported for unwind information - if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - Ok(Some(UnwindInfo::build::(unwind)?)) -} - -struct RegisterMapper; - -impl crate::isa::unwind::winx64::RegisterMapper for RegisterMapper { - fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister { - use crate::isa::unwind::winx64::MappedRegister; - if GPR.contains(reg) { - MappedRegister::Int(GPR.index_of(reg) as u8) - } else if FPR.contains(reg) { - MappedRegister::Xmm(reg as u8) - } else { - panic!() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind}; - use crate::isa::unwind::winx64::UnwindCode; - use crate::isa::x86::registers::RU; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_wrong_calling_convention() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function(CallConv::SystemV, None)); - - context.compile(&*isa).expect("expected compilation"); - - assert_eq!( - create_unwind_info(&context.func, &*isa).expect("can create unwind info"), - None - ); - } - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 9, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 9, - size: 64 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 8); - - let mut buf = [0u8; 8]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x09, // Prologue size - 0x02, // Unwind code count (1 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x09, // Prolog offset - 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) - ] - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 10000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x03, // Unwind code count (2 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) - 0xE2, // Low size byte - 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - 0x00, // Padding - 0x00, // Padding - ] - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 1000000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x04, // Unwind code count (3 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) - 0x40, // Byte 1 of size - 0x42, // Byte 2 of size - 0x0F, // Byte 3 of size - 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - ] - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } -} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 3331534c49..92edfd744f 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -84,12 +84,9 @@ pub(crate) mod aarch64; #[cfg(feature = "s390x")] mod s390x; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] mod legacy; -#[cfg(feature = "x86")] -use legacy::x86; - #[cfg(feature = "riscv")] use legacy::riscv; @@ -120,49 +117,19 @@ macro_rules! isa_builder { }}; } -/// The "variant" for a given target. On one platform (x86-64), we have two -/// backends, the "old" and "new" one; the new one is the default if included -/// in the build configuration and not otherwise specified. -#[derive(Clone, Copy, Debug)] -pub enum BackendVariant { - /// Any backend available. - Any, - /// A "legacy" backend: one that operates using legalizations and encodings. - Legacy, - /// A backend built on `MachInst`s and the `VCode` framework. - MachInst, -} - -impl Default for BackendVariant { - fn default() -> Self { - BackendVariant::Any - } -} - /// Look for an ISA for the given `triple`, selecting the backend variant given /// by `variant` if available. -pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result { - match (triple.architecture, variant) { - (Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => { +pub fn lookup_variant(triple: Triple) -> Result { + match triple.architecture { + Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => { isa_builder!(riscv, (feature = "riscv"), triple) } - (Architecture::X86_64, BackendVariant::Legacy) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::X86_64, BackendVariant::MachInst) => { + Architecture::X86_64 => { isa_builder!(x64, (feature = "x86"), triple) } - #[cfg(not(feature = "old-x86-backend"))] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x64, (feature = "x86"), triple) - } - #[cfg(feature = "old-x86-backend")] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple), - (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple), - (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple), + Architecture::Arm { .. } => isa_builder!(arm32, (feature = "arm32"), triple), + Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), + Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), _ => Err(LookupError::Unsupported), } } @@ -170,7 +137,7 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result Result { - lookup_variant(triple, BackendVariant::Any) + lookup_variant(triple) } /// Look for a supported ISA with the given `name`. @@ -292,11 +259,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; - /// Get the variant of this ISA (Legacy or MachInst). - fn variant(&self) -> BackendVariant { - BackendVariant::Legacy - } - /// Hashes all flags, both ISA-independent and ISA-specific, into the /// specified hasher. fn hash_all_flags(&self, hasher: &mut dyn Hasher); diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index cd0db1ae34..c100e36031 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2962,45 +2962,6 @@ fn lower_insn_to_regs>( | Opcode::IfcmpImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } } Ok(()) diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 9115db0671..7119d5b260 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -109,7 +109,6 @@ mod tests { use target_lexicon::triple; #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_simple_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") @@ -152,7 +151,6 @@ mod tests { } #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_multi_return_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index da29a04314..9b51a27b07 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6900,44 +6900,6 @@ fn lower_insn_to_regs>( panic!("Branch opcode reached non-branch lowering logic!"); } - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - Opcode::Nop => { // Nothing. } diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 3b794a1e25..ee89ee35f0 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -21,9 +21,9 @@ use crate::ir::types::{I32, I64}; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] use crate::predicates; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] use alloc::vec::Vec; use crate::timing; diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index c0bc76417f..b60bf8300a 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -3,7 +3,7 @@ use crate::binemit; use crate::ir; use crate::isa::{ - BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, + EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, }; use crate::machinst::*; use crate::regalloc::RegisterSet; @@ -64,10 +64,6 @@ impl TargetIsa for TargetIsaAdapter { self.backend.isa_flags() } - fn variant(&self) -> BackendVariant { - BackendVariant::MachInst - } - fn hash_all_flags(&self, hasher: &mut dyn Hasher) { self.backend.hash_all_flags(hasher); } diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index f5789b67ee..902df05a08 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %amode_add(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index cbd265a9ea..64049860dd 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(b1, i32, i32) -> i32 { ; check: pushq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index 8b43d70c7c..88b605d3d7 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 99aec088ac..edcf36ec1a 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 8f619e2aa7..e2f0d8e81a 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; system_v has first param in %rdi, fascall in %rcx function %one_arg(i32) system_v { diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif index f36caed88a..627e4700bb 100644 --- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_lzcnt +target x86_64 has_lzcnt function %clz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 16c788ed84..853fd0d5f9 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif index 5931451e11..42f82c653f 100644 --- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_bmi1 +target x86_64 has_bmi1 function %ctz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 3984aba42f..5a366914a2 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -1,6 +1,6 @@ test compile set avoid_div_traps=false -target x86_64 machinst +target x86_64 ;; We should get the checked-div/rem sequence (`srem` pseudoinst below) even ;; when `avoid_div_traps` above is false (i.e. even when the host is normally diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index fc8d3a801d..99397044d9 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -1,7 +1,7 @@ test compile set enable_llvm_abi_extensions=true set unwind_info=true -target x86_64 machinst +target x86_64 function %f0(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): @@ -206,7 +206,7 @@ block0(v0: i64): v18 = load.f64 v0+136 v19 = load.f64 v0+144 v20 = load.f64 v0+152 - + v21 = fadd.f64 v1, v2 v22 = fadd.f64 v3, v4 v23 = fadd.f64 v5, v6 diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index c1e30a3b19..2f2552aec1 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(f64) -> f64 { block0(v0: f64): diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 2c77bc7ec2..b8c27f422e 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i64 vmctx) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 61783e366d..75013a8170 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i128, i128) -> i128 { ; check: pushq %rbp @@ -190,7 +190,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %rax, %r8 ; nextln: andq $$1, %r8 ; nextln: setnz %r8b - + v4 = icmp slt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r9b @@ -201,7 +201,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r9, %r10 ; nextln: andq $$1, %r10 ; nextln: setnz %r9b - + v5 = icmp sle v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r10b @@ -212,7 +212,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r10, %r11 ; nextln: andq $$1, %r11 ; nextln: setnz %r10b - + v6 = icmp sgt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setnle %r11b @@ -307,7 +307,7 @@ block0(v0: i128): ; nextln: setz %sil ; nextln: andb %dil, %sil ; nextln: jnz label1; j label2 - + jump block2 block1: @@ -725,7 +725,7 @@ block2(v6: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } function %f24(i128, i128, i64, i128, i128, i128) -> i128 { @@ -1106,4 +1106,4 @@ block0(v0: i128, v1: i128): ; nextln: movq %rcx, %rdx ; nextln: movq %rbp, %rsp ; nextln: popq %rbp -; nextln: ret \ No newline at end of file +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index a4069b20ca..85c26dec3e 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif index 5b23afb8d3..533eb5341b 100644 --- a/cranelift/filetests/filetests/isa/x64/move-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %move_registers(i32x4) -> b8x16 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif index 2049f53962..8326e681b0 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_popcnt has_sse42 +target x86_64 has_popcnt has_sse42 function %popcnt(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index df68f6b4b7..e3f8d0c49d 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %popcnt64(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif index 8e8b424c70..40944a797c 100644 --- a/cranelift/filetests/filetests/isa/x64/probestack.clif +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -1,6 +1,6 @@ test compile set enable_probestack=true -target x86_64 machinst +target x86_64 function %f1() -> i64 { ss0 = explicit_slot 100000 diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index af6996f85f..41c8a67fb1 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i32, i128, i128) -> i128 { ; check: pushq %rbp @@ -24,6 +24,6 @@ block0(v0: i32, v1: i128, v2: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 52761b1ed0..b7251f9fe1 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bitselect_i16x8() -> i16x8 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index b50ff6328d..7e3dee77f0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index 2f6a8c7dfd..38894f6086 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst has_ssse3 has_sse41 +target x86_64 has_ssse3 has_sse41 ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 72249faaef..29f4b2cdb0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bnot_b32x4(b32x4) -> b32x4 { block0(v0: b32x4): diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif index 31edd7bdca..c20f816fc2 100644 --- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif +++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; The goal of this test is to ensure that stack spills of an integer value, ;; which width is less than the machine word's size, cause the full word to be diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index e9001c5393..23fbb731b8 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function u0:0(i64 sarg(64)) -> i8 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index ee59ff4963..90a6d6fbe4 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64 sret) { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif index 37a4698619..71bf7dada9 100644 --- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif @@ -1,6 +1,6 @@ test compile set tls_model=elf_gd -target x86_64 machinst +target x86_64 function u0:0(i32) -> i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index ef43c3dd03..32c856c419 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %elide_uextend_add(i32, i32) -> i64 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif index 5ddd4b20d3..6548930328 100644 --- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif +++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; From: https://github.com/bytecodealliance/wasmtime/issues/2670 diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif deleted file mode 100644 index bfea325055..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abcd.clif +++ /dev/null @@ -1,13 +0,0 @@ -test regalloc -target i686 legacy - -; %rdi can't be used in a movsbl instruction, so test that the register -; allocator can move it to a register that can be. - -function %test(i32 [%rdi]) -> i32 system_v { -block0(v0: i32 [%rdi]): - v1 = ireduce.i8 v0 - v2 = sextend.i32 v1 - return v2 -} -; check: regmove v1, %rdi -> %rax diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif deleted file mode 100644 index 5286de3c18..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi-bool.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy haswell - -function %foo(i64, i64, i64, i32) -> b1 system_v { -block3(v0: i64, v1: i64, v2: i64, v3: i32): - v5 = icmp ne v2, v2 - v8 = iconst.i64 0 - jump block2(v8, v3, v5) - -block2(v10: i64, v30: i32, v37: b1): - v18 = load.i32 notrap aligned v2 - v27 = iadd.i64 v10, v10 - v31 = icmp eq v30, v30 - brz v31, block2(v27, v30, v37) - jump block0(v37) - -block0(v35: b1): - return v35 -} diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif deleted file mode 100644 index 8ca530a695..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi32.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target i686 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v - -block0: - return -} - diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif deleted file mode 100644 index 0da2aad424..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi64.clif +++ /dev/null @@ -1,37 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v - - sig3 = () -> i128 system_v - ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v - - sig4 = (i128) -> i128 system_v - ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v - -block0: - return -} - -function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v { - sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v - fn0 = u0:0 sig0 - -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64): - call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif deleted file mode 100644 index ca0ace1342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif +++ /dev/null @@ -1,25 +0,0 @@ -; binary emission of 32-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs32.clif | llvm-mc -show-encoding -triple=i386 -; - -; Tests from binary32.clif affected by emit_all_ones_funcaddrs. -function %I32() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movl $-1, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) ffffffff - ; asm: movl $-1, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) ffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif deleted file mode 100644 index 7fbb670df2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif +++ /dev/null @@ -1,27 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests from binary64.clif affected by emit_all_ones_funcaddrs. -function %I64() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movabsq $-1, %rcx - [-,%rcx] v400 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %rsi - [-,%rsi] v401 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %r10 - [-,%r10] v402 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) ffffffffffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif deleted file mode 100644 index d2713829cd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set enable_probestack=false -target i686 legacy - -function u0:0(i32 vmctx) baldrdash_system_v { - sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v - -block0(v0: i32): - v2 = iconst.i32 0 - v8 = iconst.i32 0 - v9 = iconst.i32 0 - call_indirect sig0, v8(v9, v2) - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif deleted file mode 100644 index 9099315878..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif +++ /dev/null @@ -1,92 +0,0 @@ -test compile -target x86_64 legacy baseline - - -; clz/ctz on 64 bit operands - -function %i64_clz(i64) -> i64 { -block0(v10: i64): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i64 - return v11 -} - -function %i64_ctz(i64) -> i64 { -block1(v20: i64): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i64 - return v21 -} - - -; clz/ctz on 32 bit operands - -function %i32_clz(i32) -> i32 { -block0(v10: i32): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i32 - return v11 -} - -function %i32_ctz(i32) -> i32 { -block1(v20: i32): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i32 - return v21 -} - - -; popcount on 64 bit operands - -function %i64_popcount(i64) -> i64 { -block0(v30: i64): - v31 = popcnt v30; - ; check: ushr_imm - ; check: iconst.i64 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i64 - ; check: band - ; check: iconst.i64 - ; check: imul - ; check: ushr_imm - return v31; -} - - -; popcount on 32 bit operands - -function %i32_popcount(i32) -> i32 { -block0(v40: i32): - v41 = popcnt v40; - ; check: ushr_imm - ; check: iconst.i32 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i32 - ; check: band - ; check: iconst.i32 - ; check: imul - ; check: ushr_imm - return v41; -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif deleted file mode 100644 index b2f36ff148..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif +++ /dev/null @@ -1,87 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy baseline - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %Foo() { -block0: - ; 64-bit wide bsf - - [-,%r11] v10 = iconst.i64 0x1234 - ; asm: bsfq %r11, %rcx - [-,%rcx,%rflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb - - [-,%rdx] v14 = iconst.i64 0x5678 - ; asm: bsfq %rdx, %r12 - [-,%r12,%rflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2 - - ; asm: bsfq %rdx, %rdi - [-,%rdi,%rflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa - - - ; 32-bit wide bsf - - [-,%r11] v20 = iconst.i32 0x1234 - ; asm: bsfl %r11d, %ecx - [-,%rcx,%rflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb - - [-,%rdx] v24 = iconst.i32 0x5678 - ; asm: bsfl %edx, %r12d - [-,%r12,%rflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2 - - ; asm: bsfl %edx, %esi - [-,%rsi,%rflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2 - - - ; 64-bit wide bsr - - [-,%r11] v30 = iconst.i64 0x1234 - ; asm: bsrq %r11, %rcx - [-,%rcx,%rflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb - - [-,%rdx] v34 = iconst.i64 0x5678 - ; asm: bsrq %rdx, %r12 - [-,%r12,%rflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2 - - ; asm: bsrq %rdx, %rdi - [-,%rdi,%rflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa - - - ; 32-bit wide bsr - - [-,%r11] v40 = iconst.i32 0x1234 - ; asm: bsrl %r11d, %ecx - [-,%rcx,%rflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb - - [-,%rdx] v44 = iconst.i32 0x5678 - ; asm: bsrl %edx, %r12d - [-,%r12,%rflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2 - - ; asm: bsrl %edx, %esi - [-,%rsi,%rflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2 - - - ; 64-bit wide cmov - - ; asm: cmoveq %r11, %rdx - [-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3 - - ; asm: cmoveq %rdi, %rdx - [-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7 - - - ; 32-bit wide cmov - - ; asm: cmovnel %r11d, %edx - [-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3 - - ; asm: cmovlel %esi, %edx - [-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6 - - - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif deleted file mode 100644 index cfac85f7b2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif +++ /dev/null @@ -1,557 +0,0 @@ -; Binary emission of 32-bit floating point code. -test binemit -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32-float.clif | llvm-mc -show-encoding -triple=i386 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2ss %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 0f 2a e9 - ; asm: cvtsi2ss %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f32 v1 ; bin: f3 0f 2a d6 - - ; asm: cvtss2sd %xmm2, %xmm5 - [-,%xmm5] v12 = fpromote.f64 v11 ; bin: f3 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm2 - [-,%xmm2] v13 = fpromote.f64 v10 ; bin: f3 0f 5a d5 - - ; asm: movd %ecx, %xmm5 - [-,%xmm5] v14 = bitcast.f32 v0 ; bin: 66 0f 6e e9 - ; asm: movd %esi, %xmm2 - [-,%xmm2] v15 = bitcast.f32 v1 ; bin: 66 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v16 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm2, %esi - [-,%rsi] v17 = bitcast.i32 v11 ; bin: 66 0f 7e d6 - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addss %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 0f 58 ea - ; asm: addss %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f3 0f 58 d5 - - ; asm: subss %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 0f 5c ea - ; asm: subss %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f3 0f 5c d5 - - ; asm: mulss %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 0f 59 ea - ; asm: mulss %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f3 0f 59 d5 - - ; asm: divss %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 0f 5e ea - ; asm: divss %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f3 0f 5d ea - ; asm: minss %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f3 0f 5d d5 - ; asm: maxss %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f3 0f 5f ea - ; asm: maxss %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f3 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f3 0f 51 d5 - ; asm: sqrtss %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0a e5 00 - ; asm: roundss $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0a e5 01 - ; asm: roundss $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0a e5 02 - ; asm: roundss $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0a e5 03 - ; asm: roundss $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%ecx), %xmm5 - [-,%xmm5] v100 = load.f32 v0 ; bin: heap_oob f3 0f 10 29 - ; asm: movss (%esi), %xmm2 - [-,%xmm2] v101 = load.f32 v1 ; bin: heap_oob f3 0f 10 16 - ; asm: movss 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f32 v0+50 ; bin: heap_oob f3 0f 10 69 32 - ; asm: movss -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f32 v1-50 ; bin: heap_oob f3 0f 10 56 ce - ; asm: movss 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f32 v0+10000 ; bin: heap_oob f3 0f 10 a9 00002710 - ; asm: movss -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f32 v1-10000 ; bin: heap_oob f3 0f 10 96 ffffd8f0 - - ; asm: movss %xmm5, (%ecx) - [-] store.f32 v100, v0 ; bin: heap_oob f3 0f 11 29 - ; asm: movss %xmm2, (%esi) - [-] store.f32 v101, v1 ; bin: heap_oob f3 0f 11 16 - ; asm: movss %xmm5, 50(%ecx) - [-] store.f32 v100, v0+50 ; bin: heap_oob f3 0f 11 69 32 - ; asm: movss %xmm2, -50(%esi) - [-] store.f32 v101, v1-50 ; bin: heap_oob f3 0f 11 56 ce - ; asm: movss %xmm5, 10000(%ecx) - [-] store.f32 v100, v0+10000 ; bin: heap_oob f3 0f 11 a9 00002710 - ; asm: movss %xmm2, -10000(%esi) - [-] store.f32 v101, v1-10000 ; bin: heap_oob f3 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 0f 11 94 24 00000408 - - ; asm: movss 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f3 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 0f 2e ea - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i32 1 - [-,%rbx] v351 = iconst.i32 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce - ; asm: movss 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: movss -0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0 - ; asm: movss %xmm5,(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movss %xmm5,0x32(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movss %xmm2,-0x32(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce - ; asm: movss %xmm5,0x2710(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movss %xmm2,-0x2710(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2sd %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 0f 2a e9 - ; asm: cvtsi2sd %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f64 v1 ; bin: f2 0f 2a d6 - - ; asm: cvtsd2ss %xmm2, %xmm5 - [-,%xmm5] v12 = fdemote.f32 v11 ; bin: f2 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm2 - [-,%xmm2] v13 = fdemote.f32 v10 ; bin: f2 0f 5a d5 - - ; No i64 <-> f64 bitcasts in 32-bit mode. - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addsd %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 0f 58 ea - ; asm: addsd %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f2 0f 58 d5 - - ; asm: subsd %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 0f 5c ea - ; asm: subsd %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f2 0f 5c d5 - - ; asm: mulsd %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 0f 59 ea - ; asm: mulsd %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f2 0f 59 d5 - - ; asm: divsd %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 0f 5e ea - ; asm: divsd %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f2 0f 5d ea - ; asm: minsd %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f2 0f 5d d5 - ; asm: maxsd %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f2 0f 5f ea - ; asm: maxsd %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f2 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f2 0f 51 d5 - ; asm: sqrtsd %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0b e5 00 - ; asm: roundsd $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0b e5 01 - ; asm: roundsd $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0b e5 02 - ; asm: roundsd $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0b e5 03 - ; asm: roundsd $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%ecx), %xmm5 - [-,%xmm5] v100 = load.f64 v0 ; bin: heap_oob f2 0f 10 29 - ; asm: movsd (%esi), %xmm2 - [-,%xmm2] v101 = load.f64 v1 ; bin: heap_oob f2 0f 10 16 - ; asm: movsd 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f64 v0+50 ; bin: heap_oob f2 0f 10 69 32 - ; asm: movsd -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f64 v1-50 ; bin: heap_oob f2 0f 10 56 ce - ; asm: movsd 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f64 v0+10000 ; bin: heap_oob f2 0f 10 a9 00002710 - ; asm: movsd -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f64 v1-10000 ; bin: heap_oob f2 0f 10 96 ffffd8f0 - - ; asm: movsd %xmm5, (%ecx) - [-] store.f64 v100, v0 ; bin: heap_oob f2 0f 11 29 - ; asm: movsd %xmm2, (%esi) - [-] store.f64 v101, v1 ; bin: heap_oob f2 0f 11 16 - ; asm: movsd %xmm5, 50(%ecx) - [-] store.f64 v100, v0+50 ; bin: heap_oob f2 0f 11 69 32 - ; asm: movsd %xmm2, -50(%esi) - [-] store.f64 v101, v1-50 ; bin: heap_oob f2 0f 11 56 ce - ; asm: movsd %xmm5, 10000(%ecx) - [-] store.f64 v100, v0+10000 ; bin: heap_oob f2 0f 11 a9 00002710 - ; asm: movsd %xmm2, -10000(%esi) - [-] store.f64 v101, v1-10000 ; bin: heap_oob f2 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 0f 11 94 24 00000408 - - ; asm: movsd 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f2 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 0f 2e ea - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %al - [-,%rax] v14 = trueff gt v1 ; bin: 0f 97 c0 - ; asm: setae %al - [-,%rax] v15 = trueff ge v1 ; bin: 0f 93 c0 - ; asm: setb %cl - [-,%rcx] v16 = trueff ult v1 ; bin: 0f 92 c1 - ; asm: setbe %cl - [-,%rcx] v17 = trueff ule v1 ; bin: 0f 96 c1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif deleted file mode 100644 index 11268d5c4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32.clif +++ /dev/null @@ -1,721 +0,0 @@ -; binary emission of x86-32 code. -test binemit -set opt_level=speed_and_size -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32.clif | llvm-mc -show-encoding -triple=i386 -; - -function %I32() { - sig0 = () - fn0 = %foo() - - gv0 = symbol %some_gv - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - ; asm: movl $1, %ecx - [-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001 - ; asm: movl $2, %esi - [-,%rsi] v2 = iconst.i32 2 ; bin: be 00000002 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v10 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %ecx, %esi - [-,%rsi] v11 = iadd v2, v1 ; bin: 01 ce - ; asm: subl %esi, %ecx - [-,%rcx] v12 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %ecx, %esi - [-,%rsi] v13 = isub v2, v1 ; bin: 29 ce - - ; asm: andl %esi, %ecx - [-,%rcx] v14 = band v1, v2 ; bin: 21 f1 - ; asm: andl %ecx, %esi - [-,%rsi] v15 = band v2, v1 ; bin: 21 ce - ; asm: orl %esi, %ecx - [-,%rcx] v16 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %ecx, %esi - [-,%rsi] v17 = bor v2, v1 ; bin: 09 ce - ; asm: xorl %esi, %ecx - [-,%rcx] v18 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %ecx, %esi - [-,%rsi] v19 = bxor v2, v1 ; bin: 31 ce - - ; Dynamic shifts take the shift amount in %rcx. - - ; asm: shll %cl, %esi - [-,%rsi] v20 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %ecx - [-,%rcx] v21 = ishl v1, v1 ; bin: d3 e1 - ; asm: shrl %cl, %esi - [-,%rsi] v22 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %ecx - [-,%rcx] v23 = ushr v1, v1 ; bin: d3 e9 - ; asm: sarl %cl, %esi - [-,%rsi] v24 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %ecx - [-,%rcx] v25 = sshr v1, v1 ; bin: d3 f9 - ; asm: roll %cl, %esi - [-,%rsi] v26 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %ecx - [-,%rcx] v27 = rotl v1, v1 ; bin: d3 c1 - ; asm: rorl %cl, %esi - [-,%rsi] v28 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %ecx - [-,%rcx] v29 = rotr v1, v1 ; bin: d3 c9 - - ; Integer Register - Immediate 8-bit operations. - ; The 8-bit immediate is sign-extended. - - ; asm: addl $-128, %ecx - [-,%rcx] v30 = iadd_imm v1, -128 ; bin: 83 c1 80 - ; asm: addl $10, %esi - [-,%rsi] v31 = iadd_imm v2, 10 ; bin: 83 c6 0a - - ; asm: andl $-128, %ecx - [-,%rcx] v32 = band_imm v1, -128 ; bin: 83 e1 80 - ; asm: andl $10, %esi - [-,%rsi] v33 = band_imm v2, 10 ; bin: 83 e6 0a - ; asm: orl $-128, %ecx - [-,%rcx] v34 = bor_imm v1, -128 ; bin: 83 c9 80 - ; asm: orl $10, %esi - [-,%rsi] v35 = bor_imm v2, 10 ; bin: 83 ce 0a - ; asm: xorl $-128, %ecx - [-,%rcx] v36 = bxor_imm v1, -128 ; bin: 83 f1 80 - ; asm: xorl $10, %esi - [-,%rsi] v37 = bxor_imm v2, 10 ; bin: 83 f6 0a - - ; Integer Register - Immediate 32-bit operations. - - ; asm: addl $-128000, %ecx - [-,%rcx] v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00 - ; asm: addl $1000000, %esi - [-,%rsi] v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240 - - ; asm: andl $-128000, %ecx - [-,%rcx] v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00 - ; asm: andl $1000000, %esi - [-,%rsi] v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240 - ; asm: orl $-128000, %ecx - [-,%rcx] v44 = bor_imm v1, -128000 ; bin: 81 c9 fffe0c00 - ; asm: orl $1000000, %esi - [-,%rsi] v45 = bor_imm v2, 1000000 ; bin: 81 ce 000f4240 - ; asm: xorl $-128000, %ecx - [-,%rcx] v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00 - ; asm: xorl $1000000, %esi - [-,%rsi] v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240 - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v50 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %ecx, %esi - [-,%rsi] v51 = imul v2, v1 ; bin: 0f af f1 - - ; asm: movl $1, %eax - [-,%rax] v52 = iconst.i32 1 ; bin: b8 00000001 - ; asm: movl $2, %edx - [-,%rdx] v53 = iconst.i32 2 ; bin: ba 00000002 - ; asm: idivl %ecx - [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2 ; bin: int_divz f7 fe - ; asm: divl %ecx - [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2 ; bin: int_divz f7 f6 - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v80 = copy v2 ; bin: 89 f1 - ; asm: movl %ecx, %esi - [-,%rsi] v81 = copy v1 ; bin: 89 ce - - ; Copy Special - ; asm: movl %esp, %ebp - copy_special %rsp -> %rbp ; bin: 89 e5 - ; asm: movl %ebp, %esp - copy_special %rbp -> %rsp ; bin: 89 ec - - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl %ecx, (%esi) - store v1, v2 ; bin: heap_oob 89 0e - ; asm: movl %esi, (%ecx) - store v2, v1 ; bin: heap_oob 89 31 - ; asm: movw %cx, (%esi) - istore16 v1, v2 ; bin: heap_oob 66 89 0e - ; asm: movw %si, (%ecx) - istore16 v2, v1 ; bin: heap_oob 66 89 31 - ; asm: movb %cl, (%esi) - istore8 v1, v2 ; bin: heap_oob 88 0e - ; Can't store %sil in 32-bit mode (needs REX prefix). - - ; asm: movl (%ecx), %edi - [-,%rdi] v100 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%esi), %edx - [-,%rdx] v101 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%ecx), %edi - [-,%rdi] v102 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%esi), %edx - [-,%rdx] v103 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%ecx), %edi - [-,%rdi] v104 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%esi), %edx - [-,%rdx] v105 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%ecx), %edi - [-,%rdi] v106 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%esi), %edx - [-,%rdx] v107 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%ecx), %edi - [-,%rdi] v108 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%esi), %edx - [-,%rdx] v109 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl %ecx, 100(%esi) - store v1, v2+100 ; bin: heap_oob 89 4e 64 - ; asm: movl %esi, -100(%ecx) - store v2, v1-100 ; bin: heap_oob 89 71 9c - ; asm: movw %cx, 100(%esi) - istore16 v1, v2+100 ; bin: heap_oob 66 89 4e 64 - ; asm: movw %si, -100(%ecx) - istore16 v2, v1-100 ; bin: heap_oob 66 89 71 9c - ; asm: movb %cl, 100(%esi) - istore8 v1, v2+100 ; bin: heap_oob 88 4e 64 - - ; asm: movl 50(%ecx), %edi - [-,%rdi] v110 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%esi), %edx - [-,%rdx] v111 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%ecx), %edi - [-,%rdi] v112 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%esi), %edx - [-,%rdx] v113 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%ecx), %edi - [-,%rdi] v114 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%esi), %edx - [-,%rdx] v115 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%ecx), %edi - [-,%rdi] v116 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%esi), %edx - [-,%rdx] v117 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%ecx), %edi - [-,%rdi] v118 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%esi), %edx - [-,%rdx] v119 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl %ecx, 10000(%esi) - store v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%ecx) - store v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%esi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%ecx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%esi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - - ; asm: movl 50000(%ecx), %edi - [-,%rdi] v120 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%esi), %edx - [-,%rdx] v121 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%ecx), %edi - [-,%rdi] v122 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%esi), %edx - [-,%rdx] v123 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%ecx), %edi - [-,%rdi] v124 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%esi), %edx - [-,%rdx] v125 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%ecx), %edi - [-,%rdi] v126 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%esi), %edx - [-,%rdx] v127 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%ecx), %edi - [-,%rdi] v128 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%esi), %edx - [-,%rdx] v129 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %ecx, %esi - [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v202 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %ecx, %esi - [-,%rsi] v203 = clz v1 ; bin: f3 0f bd f1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v204 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %ecx, %esi - [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %ecx, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v1 ; bin: 39 ce 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %ecx, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v1 ; bin: 39 ce 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %ecx, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v1 ; bin: 39 ce 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %ecx, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v1 ; bin: 39 ce 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %ecx, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v1 ; bin: 39 ce 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %ecx, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v1 ; bin: 39 ce 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %ecx, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v1 ; bin: 39 ce 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %ecx, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v1 ; bin: 39 ce 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %ecx, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v1 ; bin: 39 ce 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %ecx, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v1 ; bin: 39 ce 0f 96 c2 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; asm: call foo - call fn0() ; bin: stk_ovf e8 CallPCRel4(%foo-4) 00000000 - - ; asm: movl $0, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) 00000000 - - ; asm: call *%ecx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%esi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - - ; asm: movl $0, %ecx - [-,%rcx] v450 = symbol_value.i32 gv0 ; bin: b9 Abs4(%some_gv) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v451 = symbol_value.i32 gv0 ; bin: be Abs4(%some_gv) 00000000 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%esp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%esp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - - ; asm: movl 1032(%esp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%esp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - - ; asm: movl %ecx, 1032(%esp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%esp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushl %ecx - x86_push v1 ; bin: stk_ovf 51 - ; asm: popl %ecx - [-,%rcx] v512 = x86_pop.i32 ; bin: 59 - - ; Adjust Stack Pointer Up - ; asm: addl $64, %esp - adjust_sp_up_imm 64 ; bin: 83 c4 40 - ; asm: addl $-64, %esp - adjust_sp_up_imm -64 ; bin: 83 c4 c0 - ; asm: addl $1024, %esp - adjust_sp_up_imm 1024 ; bin: 81 c4 00000400 - ; asm: addl $-1024, %esp - adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subl %ecx, %esp - adjust_sp_down v1 ; bin: 29 cc - ; asm: subl %esi, %esp - adjust_sp_down v2 ; bin: 29 f4 - ; asm: addl $64, %esp - adjust_sp_down_imm 64 ; bin: 83 ec 40 - ; asm: addl $-64, %esp - adjust_sp_down_imm -64 ; bin: 83 ec c0 - ; asm: addl $1024, %esp - adjust_sp_down_imm 1024 ; bin: 81 ec 00000400 - ; asm: addl $-1024, %esp - adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000 - - ; Shift immediates - ; asm: shll $2, %esi - [-,%rsi] v513 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: sarl $5, %esi - [-,%rsi] v514 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: shrl $8, %esi - [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 - - ; Rotate immediates - ; asm: rolq $12, %esi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: c1 c6 0c - ; asm: rorq $5, %esi - [-,%rsi] v5103 = rotr_imm v2, 5 ; bin: c1 ce 05 - - ; Load Complex - [-,%rax] v521 = iconst.i32 1 - [-,%rbx] v522 = iconst.i32 1 - ; asm: movl (%eax,%ebx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movl 1(%eax,%ebx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%eax,%ebx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbl (%eax,%ebx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbl (%eax,%ebx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwl (%eax,%ebx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswl (%eax,%ebx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - - ; Store Complex - [-,%rcx] v601 = iconst.i32 1 - ; asm: mov %ecx,(%eax,%ebx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%eax,%ebx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%eax,%ebx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %cx,(%eax,%ebx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cl,(%eax,%ebx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; Carry Addition - ; asm: addl %esi, %ecx - [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2 ; bin: 01 f1 - ; asm: adcl %esi, %ecx - [-,%rcx] v703 = iadd_ifcin v1, v2, v702 ; bin: 11 f1 - ; asm: adcl %esi, %ecx - [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1 - - ; Borrow Subtraction - ; asm: subl %esi, %ecx - [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2 ; bin: 29 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx] v708 = isub_ifbin v1, v2, v707 ; bin: 19 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1 - - ; asm: testl %ecx, %ecx - ; asm: je block1 - brz v1, block1 ; bin: 85 c9 74 0e - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1 - brz v2, block1 ; bin: 85 f6 74 0a - fallthrough block4 - -block4: - ; asm: testl %ecx, %ecx - ; asm: jne block1 - brnz v1, block1 ; bin: 85 c9 75 06 - fallthrough block5 - -block5: - ; asm: testl %esi, %esi - ; asm: jne block1 - brnz v2, block1 ; bin: 85 f6 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - ; asm: ret - return ; bin: c3 - - ; asm: block2: -block2: - trap user0 ; bin: user0 0f 0b -} - -; Special branch encodings only for I32 mode. -function %special_branches() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%rdi] v3 = icmp eq v1, v2 - [-,%rbx] v4 = icmp ugt v1, v2 - - ; asm: testl $0xff, %edi - ; asm: je block1 - brz v3, block1 ; bin: f7 c7 000000ff 0f 84 00000015 - fallthrough block2 - -block2: - ; asm: testb %bl, %bl - ; asm: je block1 - brz v4, block1 ; bin: 84 db 74 11 - fallthrough block3 - -block3: - ; asm: testl $0xff, %edi - ; asm: jne block1 - brnz v3, block1 ; bin: f7 c7 000000ff 0f 85 00000005 - fallthrough block4 - -block4: - ; asm: testb %bl, %bl - ; asm: jne block1 - brnz v4, block1 ; bin: 84 db 75 01 - fallthrough block5 - -block5: - return - -block1: - return -} - -; CPU flag instructions. -function %cpu_flags() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - jump block1 - -block1: - ; asm: cmpl %esi, %ecx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %ecx, %esi - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 39 ce - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 fa - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f8 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f6 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f4 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f2 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e f0 - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ee - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ec - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 ea - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e8 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %bl - [-,%rbx] v24 = trueif sgt v11 ; bin: 0f 9f c3 - ; asm: setle %bl - [-,%rbx] v25 = trueif sle v11 ; bin: 0f 9e c3 - ; asm: setb %dl - [-,%rdx] v26 = trueif ult v11 ; bin: 0f 92 c2 - ; asm: setae %dl - [-,%rdx] v27 = trueif uge v11 ; bin: 0f 93 c2 - ; asm: seta %bl - [-,%rbx] v28 = trueif ugt v11 ; bin: 0f 97 c3 - ; asm: setbe %bl - [-,%rbx] v29 = trueif ule v11 ; bin: 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Stack check. - ; asm: cmpl %esp, %ecx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 39 e1 - ; asm: cmpl %esp, %esi - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 39 e6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v42 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %esi - [-,%rflags] v43 = ifcmp_imm v2, 100 ; bin: 83 fe 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %esi - [-,%rflags] v45 = ifcmp_imm v2, 10000 ; bin: 81 fe 00002710 - - return -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - - trap user0 ; bin: user0 0f 0b -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif deleted file mode 100644 index 6bf6f325b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ /dev/null @@ -1,638 +0,0 @@ -; Binary emission of 64-bit floating point code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-float.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2ssl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb - ; asm: cvtsi2ssl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6 - - ; asm: cvtsi2ssq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8 - ; asm: cvtsi2ssq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6 - - ; asm: cvtss2sd %xmm10, %xmm5 - [-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm10 - [-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5 - - ; asm: movd %r11d, %xmm5 - [-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb - ; asm: movd %esi, %xmm10 - [-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm10, %esi - [-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addss %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea - ; asm: addss %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5 - - ; asm: subss %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea - ; asm: subss %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5 - - ; asm: mulss %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea - ; asm: mulss %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5 - - ; asm: divss %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea - ; asm: divss %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Copy to SSA - - ; asm: movsd %xmm0, %xmm15 - [-,%xmm15] v400 = copy_to_ssa.f64 %xmm0 ; bin: f2 44 0f 10 f8 - ; asm: movsd %xmm15, %xmm0 - [-,%xmm0] v401 = copy_to_ssa.f64 %xmm15 ; bin: f2 41 0f 10 c7 - ; asm: movsd %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v402 = copy_to_ssa.f64 %xmm7 ; bin: f2 40 0f 10 f7 - ; asm: movsd %xmm11, %xmm14 - [-,%xmm14] v403 = copy_to_ssa.f64 %xmm11 ; bin: f2 45 0f 10 f3 - - ; asm: movss %xmm0, %xmm15 - [-,%xmm15] v404 = copy_to_ssa.f32 %xmm0 ; bin: f3 44 0f 10 f8 - ; asm: movss %xmm15, %xmm0 - [-,%xmm0] v405 = copy_to_ssa.f32 %xmm15 ; bin: f3 41 0f 10 c7 - ; asm: movss %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v406 = copy_to_ssa.f32 %xmm7 ; bin: f3 40 0f 10 f7 - ; asm: movss %xmm11, %xmm14 - [-,%xmm14] v407 = copy_to_ssa.f32 %xmm11 ; bin: f3 45 0f 10 f3 - - ; Convert float to int. - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2 - - ; asm: cvttss2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd - ; asm: cvttss2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea - ; asm: minss %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5 - ; asm: maxss %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea - ; asm: maxss %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5 - ; asm: sqrtss %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00 - ; asm: roundss $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01 - ; asm: roundss $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02 - ; asm: roundss $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03 - ; asm: roundss $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%r14), %xmm5 - [-,%xmm5] v100 = load.f32 v3 ; bin: heap_oob f3 41 0f 10 2e - ; asm: movss (%rax), %xmm10 - [-,%xmm10] v101 = load.f32 v2 ; bin: heap_oob f3 44 0f 10 10 - ; asm: movss 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f32 v3+50 ; bin: heap_oob f3 41 0f 10 6e 32 - ; asm: movss -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f32 v2-50 ; bin: heap_oob f3 44 0f 10 50 ce - ; asm: movss 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f32 v3+10000 ; bin: heap_oob f3 41 0f 10 ae 00002710 - ; asm: movss -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f32 v2-10000 ; bin: heap_oob f3 44 0f 10 90 ffffd8f0 - - ; asm: movss %xmm5, (%r14) - [-] store.f32 v100, v3 ; bin: heap_oob f3 41 0f 11 2e - ; asm: movss %xmm10, (%rax) - [-] store.f32 v101, v2 ; bin: heap_oob f3 44 0f 11 10 - ; asm: movss %xmm5, (%r13) - [-] store.f32 v100, v4 ; bin: heap_oob f3 41 0f 11 6d 00 - ; asm: movss %xmm10, (%r13) - [-] store.f32 v101, v4 ; bin: heap_oob f3 45 0f 11 55 00 - ; asm: movss %xmm5, 50(%r14) - [-] store.f32 v100, v3+50 ; bin: heap_oob f3 41 0f 11 6e 32 - ; asm: movss %xmm10, -50(%rax) - [-] store.f32 v101, v2-50 ; bin: heap_oob f3 44 0f 11 50 ce - ; asm: movss %xmm5, 10000(%r14) - [-] store.f32 v100, v3+10000 ; bin: heap_oob f3 41 0f 11 ae 00002710 - ; asm: movss %xmm10, -10000(%rax) - [-] store.f32 v101, v2-10000 ; bin: heap_oob f3 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 44 0f 11 94 24 00000408 - - ; asm: movss 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce - ; asm: 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2sdl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb - ; asm: cvtsi2sdl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6 - - ; asm: cvtsi2sdq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8 - ; asm: cvtsi2sdq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6 - - ; asm: cvtsd2ss %xmm10, %xmm5 - [-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm10 - [-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5 - - ; asm: movq %rax, %xmm5 - [-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8 - ; asm: movq %r14, %xmm10 - [-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6 - - ; asm: movq %xmm5, %rcx - [-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9 - ; asm: movq %xmm10, %rsi - [-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addsd %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea - ; asm: addsd %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 - - ; asm: subsd %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea - ; asm: subsd %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 - - ; asm: mulsd %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea - ; asm: mulsd %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 - - ; asm: divsd %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea - ; asm: divsd %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Convert float to int. - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2 - - ; asm: cvttsd2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd - ; asm: cvttsd2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea - ; asm: minsd %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5 - ; asm: maxsd %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea - ; asm: maxsd %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5 - ; asm: sqrtsd %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00 - ; asm: roundsd $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01 - ; asm: roundsd $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02 - ; asm: roundsd $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03 - ; asm: roundsd $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%r14), %xmm5 - [-,%xmm5] v100 = load.f64 v3 ; bin: heap_oob f2 41 0f 10 2e - ; asm: movsd (%rax), %xmm10 - [-,%xmm10] v101 = load.f64 v2 ; bin: heap_oob f2 44 0f 10 10 - ; asm: movsd 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f64 v3+50 ; bin: heap_oob f2 41 0f 10 6e 32 - ; asm: movsd -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f64 v2-50 ; bin: heap_oob f2 44 0f 10 50 ce - ; asm: movsd 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f64 v3+10000 ; bin: heap_oob f2 41 0f 10 ae 00002710 - ; asm: movsd -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f64 v2-10000 ; bin: heap_oob f2 44 0f 10 90 ffffd8f0 - - ; asm: movsd %xmm5, (%r14) - [-] store.f64 v100, v3 ; bin: heap_oob f2 41 0f 11 2e - ; asm: movsd %xmm10, (%rax) - [-] store.f64 v101, v2 ; bin: heap_oob f2 44 0f 11 10 - ; asm: movsd %xmm5, (%r13) - [-] store.f64 v100, v4 ; bin: heap_oob f2 41 0f 11 6d 00 - ; asm: movsd %xmm10, (%r13) - [-] store.f64 v101, v4 ; bin: heap_oob f2 45 0f 11 55 00 - ; asm: movsd %xmm5, 50(%r14) - [-] store.f64 v100, v3+50 ; bin: heap_oob f2 41 0f 11 6e 32 - ; asm: movsd %xmm10, -50(%rax) - [-] store.f64 v101, v2-50 ; bin: heap_oob f2 44 0f 11 50 ce - ; asm: movsd %xmm5, 10000(%r14) - [-] store.f64 v100, v3+10000 ; bin: heap_oob f2 41 0f 11 ae 00002710 - ; asm: movsd %xmm10, -10000(%rax) - [-] store.f64 v101, v2-10000 ; bin: heap_oob f2 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 44 0f 11 94 24 00000408 - - ; asm: movsd 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f2 44 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - ; asm: movsd (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18 - ; asm: movsd 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32 - ; asm: movsd -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce - ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710 - ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0 - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %r10b - [-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2 - ; asm: setae %r10b - [-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2 - ; asm: setb %r14b - [-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6 - ; asm: setbe %r14b - [-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif deleted file mode 100644 index 4f2c650592..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif +++ /dev/null @@ -1,83 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-pic.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - gv1 = symbol colocated %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Colocated functions. - - ; asm: call foo - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rax - [-,%rax] v0 = func_addr.i64 fn1 ; bin: 48 8d 05 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v1 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v2 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v0() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v1() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v2() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. - - ; asm: call foo@PLT - call fn0() ; bin: stk_ovf e8 CallPLTRel4(%foo-4) 00000000 - - ; asm: mov 0x0(%rip), %rax - [-,%rax] v100 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v101 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v102 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v100() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v101() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v102() ; bin: stk_ovf 41 ff d2 - - ; asm: mov 0x0(%rip), %rcx - [-,%rcx] v3 = symbol_value.i64 gv0 ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v4 = symbol_value.i64 gv0 ; bin: 48 8b 35 GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v5 = symbol_value.i64 gv0 ; bin: 4c 8b 15 GOTPCRel4(%some_gv-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v6 = symbol_value.i64 gv1 ; bin: 48 8d 0d PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v7 = symbol_value.i64 gv1 ; bin: 48 8d 35 PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v8 = symbol_value.i64 gv1 ; bin: 4c 8d 15 PCRel4(%some_gv-4) 00000000 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif deleted file mode 100644 index c5e1cf5099..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ /dev/null @@ -1,1692 +0,0 @@ -; binary emission of x86-64 code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movq $0x01020304f1f2f3f4, %rcx - [-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4 - ; asm: movq $0x11020304f1f2f3f4, %rsi - [-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4 - ; asm: movq $0x21020304f1f2f3f4, %r10 - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - ; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant. - [-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant. - [-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - ; asm: movb $1, %sil - [-,%r10] v9008 = bconst.b1 true ; bin: 41 ba 00000001 - - ; Integer Register Operations. - - ; asm: notq %rcx - [-,%rcx] v4000 = bnot v1 ; bin: 48 f7 d1 - ; asm: notq %rsi - [-,%rsi] v4001 = bnot v2 ; bin: 48 f7 d6 - ; asm: notq %r10 - [-,%r10] v4002 = bnot v3 ; bin: 49 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addq %rsi, %rcx - [-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1 - ; asm: addq %r10, %rsi - [-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6 - ; asm: addq %rcx, %r10 - [-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca - - ; asm: subq %rsi, %rcx - [-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1 - ; asm: subq %r10, %rsi - [-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6 - ; asm: subq %rcx, %r10 - [-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca - - ; asm: andq %rsi, %rcx - [-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1 - ; asm: andq %r10, %rsi - [-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6 - ; asm: andq %rcx, %r10 - [-,%r10] v32 = band v3, v1 ; bin: 49 21 ca - - ; asm: orq %rsi, %rcx - [-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1 - ; asm: orq %r10, %rsi - [-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6 - ; asm: orq %rcx, %r10 - [-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca - - ; asm: xorq %rsi, %rcx - [-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1 - ; asm: xorq %r10, %rsi - [-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6 - ; asm: xorq %rcx, %r10 - [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca - - ; asm: shlq %cl, %rsi - [-,%rsi] v60 = ishl v2, v1 ; bin: 48 d3 e6 - ; asm: shlq %cl, %r10 - [-,%r10] v61 = ishl v3, v1 ; bin: 49 d3 e2 - ; asm: sarq %cl, %rsi - [-,%rsi] v62 = sshr v2, v1 ; bin: 48 d3 fe - ; asm: sarq %cl, %r10 - [-,%r10] v63 = sshr v3, v1 ; bin: 49 d3 fa - ; asm: shrq %cl, %rsi - [-,%rsi] v64 = ushr v2, v1 ; bin: 48 d3 ee - ; asm: shrq %cl, %r10 - [-,%r10] v65 = ushr v3, v1 ; bin: 49 d3 ea - - ; asm: rolq %cl, %rsi - [-,%rsi] v66 = rotl v2, v1 ; bin: 48 d3 c6 - ; asm: rolq %cl, %r10 - [-,%r10] v67 = rotl v3, v1 ; bin: 49 d3 c2 - ; asm: rorq %cl, %rsi - [-,%rsi] v68 = rotr v2, v1 ; bin: 48 d3 ce - ; asm: rorq %cl, %r10 - [-,%r10] v69 = rotr v3, v1 ; bin: 49 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addq $-100000, %rcx - [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960 - ; asm: addq $100000, %rsi - [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0 - ; asm: addq $0x7fffffff, %r10 - [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff - ; asm: addq $100, %r8 - [-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64 - ; asm: addq $-100, %r14 - [-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c - - ; asm: andq $-100000, %rcx - [-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960 - ; asm: andq $100000, %rsi - [-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0 - ; asm: andq $0x7fffffff, %r10 - [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff - ; asm: andq $100, %r8 - [-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64 - ; asm: andq $-100, %r14 - [-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c - - ; asm: orq $-100000, %rcx - [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960 - ; asm: orq $100000, %rsi - [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0 - ; asm: orq $0x7fffffff, %r10 - [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff - ; asm: orq $100, %r8 - [-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64 - ; asm: orq $-100, %r14 - [-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c - ; asm: ret - - ; asm: xorq $-100000, %rcx - [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960 - ; asm: xorq $100000, %rsi - [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0 - ; asm: xorq $0x7fffffff, %r10 - [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff - ; asm: xorq $100, %r8 - [-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64 - ; asm: xorq $-100, %r14 - [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c - - ; Register copies. - - ; asm: movq %rsi, %rcx - [-,%rcx] v110 = copy v2 ; bin: 48 89 f1 - ; asm: movq %r10, %rsi - [-,%rsi] v111 = copy v3 ; bin: 4c 89 d6 - ; asm: movq %rcx, %r10 - [-,%r10] v112 = copy v1 ; bin: 49 89 ca - - ; Copy Special - ; asm: movq %rsp, %rbp - copy_special %rsp -> %rbp ; bin: 48 89 e5 - ; asm: movq %r10, %r11 - copy_special %r10 -> %r11 ; bin: 4d 89 d3 - ; asm: movq %rsp, %r11 - copy_special %rsp -> %r11 ; bin: 49 89 e3 - ; asm: movq %r10, %rsp - copy_special %r10 -> %rsp ; bin: 4c 89 d4 - - ; Copy to SSA - - ; asm: movq %rax, %r15 - [-,%r15] v700 = copy_to_ssa.i64 %rax ; bin: 49 89 c7 - ; asm: movq %r15, %rax - [-,%rax] v701 = copy_to_ssa.i64 %r15 ; bin: 4c 89 f8 - ; asm: movq %rdi, %rsi - [-,%rsi] v702 = copy_to_ssa.i64 %rdi ; bin: 48 89 fe - ; asm: movq %r11, %r14 - [-,%r14] v703 = copy_to_ssa.i64 %r11 ; bin: 4d 89 de - - ; asm: movl %eax, %r15d - [-,%r15] v704 = copy_to_ssa.i32 %rax ; bin: 41 89 c7 - ; asm: movl %r15d, %eax - [-,%rax] v705 = copy_to_ssa.i32 %r15 ; bin: 44 89 f8 - ; asm: movl %edi, %esi. Unfortunately we get a redundant REX prefix. - [-,%rsi] v706 = copy_to_ssa.i32 %rdi ; bin: 40 89 fe - ; asm: movl %r11, %r14 - [-,%r14] v707 = copy_to_ssa.i32 %r11 ; bin: 45 89 de - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movq %rcx, (%r10) - store v1, v3 ; bin: heap_oob 49 89 0a - ; asm: movq %r10, (%rcx) - store v3, v1 ; bin: heap_oob 4c 89 11 - ; asm: movl %ecx, (%r10) - istore32 v1, v3 ; bin: heap_oob 41 89 0a - ; asm: movl %r10d, (%rcx) - istore32 v3, v1 ; bin: heap_oob 44 89 11 - ; asm: movw %cx, (%r10) - istore16 v1, v3 ; bin: heap_oob 66 41 89 0a - ; asm: movw %r10w, (%rcx) - istore16 v3, v1 ; bin: heap_oob 66 44 89 11 - ; asm: movb %cl, (%r10) - istore8 v1, v3 ; bin: heap_oob 41 88 0a - ; asm: movb %r10b, (%rcx) - istore8 v3, v1 ; bin: heap_oob 44 88 11 - - ; asm: movq (%rcx), %r14 - [-,%r14] v120 = load.i64 v1 ; bin: heap_oob 4c 8b 31 - ; asm: movq (%r10), %rdx - [-,%rdx] v121 = load.i64 v3 ; bin: heap_oob 49 8b 12 - ; asm: movl (%rcx), %r14d - [-,%r14] v122 = uload32.i64 v1 ; bin: heap_oob 44 8b 31 - ; asm: movl (%r10), %edx - [-,%rdx] v123 = uload32.i64 v3 ; bin: heap_oob 41 8b 12 - ; asm: movslq (%rcx), %r14 - [-,%r14] v124 = sload32.i64 v1 ; bin: heap_oob 4c 63 31 - ; asm: movslq (%r10), %rdx - [-,%rdx] v125 = sload32.i64 v3 ; bin: heap_oob 49 63 12 - ; asm: movzwq (%rcx), %r14 - [-,%r14] v126 = uload16.i64 v1 ; bin: heap_oob 4c 0f b7 31 - ; asm: movzwq (%r10), %rdx - [-,%rdx] v127 = uload16.i64 v3 ; bin: heap_oob 49 0f b7 12 - ; asm: movswq (%rcx), %r14 - [-,%r14] v128 = sload16.i64 v1 ; bin: heap_oob 4c 0f bf 31 - ; asm: movswq (%r10), %rdx - [-,%rdx] v129 = sload16.i64 v3 ; bin: heap_oob 49 0f bf 12 - ; asm: movzbq (%rcx), %r14 - [-,%r14] v130 = uload8.i64 v1 ; bin: heap_oob 4c 0f b6 31 - ; asm: movzbq (%r10), %rdx - [-,%rdx] v131 = uload8.i64 v3 ; bin: heap_oob 49 0f b6 12 - ; asm: movsbq (%rcx), %r14 - [-,%r14] v132 = sload8.i64 v1 ; bin: heap_oob 4c 0f be 31 - ; asm: movsbq (%r10), %rdx - [-,%rdx] v133 = sload8.i64 v3 ; bin: heap_oob 49 0f be 12 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movq %rcx, 100(%r10) - store v1, v3+100 ; bin: heap_oob 49 89 4a 64 - ; asm: movq %r10, -100(%rcx) - store v3, v1-100 ; bin: heap_oob 4c 89 51 9c - ; asm: movl %ecx, 100(%r10) - istore32 v1, v3+100 ; bin: heap_oob 41 89 4a 64 - ; asm: movl %r10d, -100(%rcx) - istore32 v3, v1-100 ; bin: heap_oob 44 89 51 9c - ; asm: movw %cx, 100(%r10) - istore16 v1, v3+100 ; bin: heap_oob 66 41 89 4a 64 - ; asm: movw %r10w, -100(%rcx) - istore16 v3, v1-100 ; bin: heap_oob 66 44 89 51 9c - ; asm: movb %cl, 100(%r10) - istore8 v1, v3+100 ; bin: heap_oob 41 88 4a 64 - ; asm: movb %r10b, 100(%rcx) - istore8 v3, v1+100 ; bin: heap_oob 44 88 51 64 - - ; asm: movq 50(%rcx), %r10 - [-,%r10] v140 = load.i64 v1+50 ; bin: heap_oob 4c 8b 51 32 - ; asm: movq -50(%r10), %rdx - [-,%rdx] v141 = load.i64 v3-50 ; bin: heap_oob 49 8b 52 ce - ; asm: movl 50(%rcx), %edi - [-,%rdi] v142 = uload32.i64 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v143 = uload32.i64 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movslq 50(%rcx), %rdi - [-,%rdi] v144 = sload32.i64 v1+50 ; bin: heap_oob 48 63 79 32 - ; asm: movslq -50(%rsi), %rdx - [-,%rdx] v145 = sload32.i64 v2-50 ; bin: heap_oob 48 63 56 ce - ; asm: movzwq 50(%rcx), %rdi - [-,%rdi] v146 = uload16.i64 v1+50 ; bin: heap_oob 48 0f b7 79 32 - ; asm: movzwq -50(%rsi), %rdx - [-,%rdx] v147 = uload16.i64 v2-50 ; bin: heap_oob 48 0f b7 56 ce - ; asm: movswq 50(%rcx), %rdi - [-,%rdi] v148 = sload16.i64 v1+50 ; bin: heap_oob 48 0f bf 79 32 - ; asm: movswq -50(%rsi), %rdx - [-,%rdx] v149 = sload16.i64 v2-50 ; bin: heap_oob 48 0f bf 56 ce - ; asm: movzbq 50(%rcx), %rdi - [-,%rdi] v150 = uload8.i64 v1+50 ; bin: heap_oob 48 0f b6 79 32 - ; asm: movzbq -50(%rsi), %rdx - [-,%rdx] v151 = uload8.i64 v2-50 ; bin: heap_oob 48 0f b6 56 ce - ; asm: movsbq 50(%rcx), %rdi - [-,%rdi] v152 = sload8.i64 v1+50 ; bin: heap_oob 48 0f be 79 32 - ; asm: movsbq -50(%rsi), %rdx - [-,%rdx] v153 = sload8.i64 v2-50 ; bin: heap_oob 48 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movq %rcx, 10000(%r10) - store v1, v3+10000 ; bin: heap_oob 49 89 8a 00002710 - ; asm: movq %r10, -10000(%rcx) - store v3, v1-10000 ; bin: heap_oob 4c 89 91 ffffd8f0 - ; asm: movl %ecx, 10000(%rsi) - istore32 v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%rcx) - istore32 v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%rsi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%rcx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%rsi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - ; asm: movb %sil, 10000(%rcx) - istore8 v2, v1+10000 ; bin: heap_oob 40 88 b1 00002710 - - ; asm: movq 50000(%rcx), %r10 - [-,%r10] v160 = load.i64 v1+50000 ; bin: heap_oob 4c 8b 91 0000c350 - ; asm: movq -50000(%r10), %rdx - [-,%rdx] v161 = load.i64 v3-50000 ; bin: heap_oob 49 8b 92 ffff3cb0 - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v162 = uload32.i64 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v163 = uload32.i64 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movslq 50000(%rcx), %rdi - [-,%rdi] v164 = sload32.i64 v1+50000 ; bin: heap_oob 48 63 b9 0000c350 - ; asm: movslq -50000(%rsi), %rdx - [-,%rdx] v165 = sload32.i64 v2-50000 ; bin: heap_oob 48 63 96 ffff3cb0 - ; asm: movzwq 50000(%rcx), %rdi - [-,%rdi] v166 = uload16.i64 v1+50000 ; bin: heap_oob 48 0f b7 b9 0000c350 - ; asm: movzwq -50000(%rsi), %rdx - [-,%rdx] v167 = uload16.i64 v2-50000 ; bin: heap_oob 48 0f b7 96 ffff3cb0 - ; asm: movswq 50000(%rcx), %rdi - [-,%rdi] v168 = sload16.i64 v1+50000 ; bin: heap_oob 48 0f bf b9 0000c350 - ; asm: movswq -50000(%rsi), %rdx - [-,%rdx] v169 = sload16.i64 v2-50000 ; bin: heap_oob 48 0f bf 96 ffff3cb0 - ; asm: movzbq 50000(%rcx), %rdi - [-,%rdi] v170 = uload8.i64 v1+50000 ; bin: heap_oob 48 0f b6 b9 0000c350 - ; asm: movzbq -50000(%rsi), %rdx - [-,%rdx] v171 = uload8.i64 v2-50000 ; bin: heap_oob 48 0f b6 96 ffff3cb0 - ; asm: movsbq 50000(%rcx), %rdi - [-,%rdi] v172 = sload8.i64 v1+50000 ; bin: heap_oob 48 0f be b9 0000c350 - ; asm: movsbq -50000(%rsi), %rdx - [-,%rdx] v173 = sload8.i64 v2-50000 ; bin: heap_oob 48 0f be 96 ffff3cb0 - - - ; More arithmetic. - - ; asm: imulq %rsi, %rcx - [-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce - ; asm: imulq %r10, %rsi - [-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2 - ; asm: imulq %rcx, %r10 - [-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1 - - [-,%rax] v190 = iconst.i64 1 - [-,%rdx] v191 = iconst.i64 2 - ; asm: idivq %rcx - [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1 ; bin: int_divz 48 f7 f9 - ; asm: idivq %rsi - [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2 ; bin: int_divz 48 f7 fe - ; asm: idivq %r10 - [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3 ; bin: int_divz 49 f7 fa - ; asm: divq %rcx - [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1 ; bin: int_divz 48 f7 f1 - ; asm: divq %rsi - [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2 ; bin: int_divz 48 f7 f6 - ; asm: divq %r10 - [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3 ; bin: int_divz 49 f7 f2 - - ; double-length multiply instructions, 64 bit - [-,%rax] v1001 = iconst.i64 1 - [-,%r15] v1002 = iconst.i64 2 - ; asm: mulq %r15 - [-,%rax,%rdx] v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7 - ; asm: imulq %r15 - [-,%rax,%rdx] v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef - - ; double-length multiply instructions, 32 bit - [-,%rax] v1011 = iconst.i32 1 - [-,%r15] v1012 = iconst.i32 2 - [-,%rcx] v1017 = iconst.i32 3 - ; asm: mull %r15d - [-,%rax,%rdx] v1013, v1014 = x86_umulx v1011, v1012 ; bin: 41 f7 e7 - ; asm: imull %r15d - [-,%rax,%rdx] v1015, v1016 = x86_smulx v1011, v1012 ; bin: 41 f7 ef - - ; asm: mull %ecx - [-,%rax,%rdx] v1018, v1019 = x86_umulx v1011, v1017 ; bin: f7 e1 - ; asm: imull %ecx - [-,%rax,%rdx] v1020, v1021 = x86_smulx v1011, v1017 ; bin: f7 e9 - - ; Bit-counting instructions. - - ; asm: popcntq %rsi, %rcx - [-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce - ; asm: popcntq %r10, %rsi - [-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2 - ; asm: popcntq %rcx, %r10 - [-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1 - - ; asm: lzcntq %rsi, %rcx - [-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce - ; asm: lzcntq %r10, %rsi - [-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2 - ; asm: lzcntq %rcx, %r10 - [-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1 - - ; asm: tzcntq %rsi, %rcx - [-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce - ; asm: tzcntq %r10, %rsi - [-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2 - ; asm: tzcntq %rcx, %r10 - [-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpq %rsi, %rcx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 48 39 f1 0f 94 c3 - ; asm: cmpq %r10, %rsi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 4c 39 d6 0f 94 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 48 39 f1 0f 95 c3 - ; asm: cmpq %r10, %rsi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 4c 39 d6 0f 95 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 48 39 f1 0f 9c c3 - ; asm: cmpq %r10, %rsi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 4c 39 d6 0f 9c c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 48 39 f1 0f 9d c3 - ; asm: cmpq %r10, %rsi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 4c 39 d6 0f 9d c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 48 39 f1 0f 9f c3 - ; asm: cmpq %r10, %rsi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 4c 39 d6 0f 9f c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 48 39 f1 0f 9e c3 - ; asm: cmpq %r10, %rsi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 4c 39 d6 0f 9e c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 48 39 f1 0f 92 c3 - ; asm: cmpq %r10, %rsi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 4c 39 d6 0f 92 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 48 39 f1 0f 93 c3 - ; asm: cmpq %r10, %rsi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 4c 39 d6 0f 93 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 48 39 f1 0f 97 c3 - ; asm: cmpq %r10, %rsi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 4c 39 d6 0f 97 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 48 39 f1 0f 96 c3 - ; asm: cmpq %r10, %rsi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2 - - ; asm: cmpq $37, %rcx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3 - - ; asm: cmpq $100000, %rcx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbq %bl, %rcx - [-,%rcx] v350 = bint.i64 v300 ; bin: 0f b6 cb - ; asm: movzbq %dl, %rsi - [-,%rsi] v351 = bint.i64 v301 ; bin: 0f b6 f2 - - ; Colocated functions. - - ; asm: call bar - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v400 = func_addr.i64 fn1 ; bin: 48 8d 0d PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v401 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v402 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rcx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v402() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. Note that there is no non-colocated non-PIC call. - - ; asm: movabsq $0, %rcx - [-,%rcx] v410 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %rsi - [-,%rsi] v411 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %r10 - [-,%r10] v412 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 - - ; asm: call *%rcx - call_indirect sig0, v410() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v411() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v412() ; bin: stk_ovf 41 ff d2 - - ; asm: movabsq $-1, %rcx - [-,%rcx] v450 = symbol_value.i64 gv0 ; bin: 48 b9 Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %rsi - [-,%rsi] v451 = symbol_value.i64 gv0 ; bin: 48 be Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %r10 - [-,%r10] v452 = symbol_value.i64 gv0 ; bin: 49 ba Abs8(%some_gv) 0000000000000000 - - ; Spill / Fill. - - ; asm: movq %rcx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq %rsi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 48 89 b4 24 00000408 - ; asm: movq %r10, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 4c 89 94 24 00000408 - - ; asm: movq 1032(%rsp), %rcx - [-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408 - ; asm: movq 1032(%rsp), %rsi - [-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408 - ; asm: movq 1032(%rsp), %r10 - [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 - - ; asm: movq %rcx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq 1032(%rsp), %rcx - regfill v1, ss1 -> %rcx ; bin: 48 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushq %rcx - x86_push v1 ; bin: stk_ovf 51 - ; asm: pushq %r10 - x86_push v3 ; bin: stk_ovf 41 52 - ; asm: popq %rcx - [-,%rcx] v513 = x86_pop.i64 ; bin: 59 - ; asm: popq %r10 - [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a - - ; Adjust Stack Pointer Up - ; asm: addq $64, %rsp - adjust_sp_up_imm 64 ; bin: 48 83 c4 40 - ; asm: addq $-64, %rsp - adjust_sp_up_imm -64 ; bin: 48 83 c4 c0 - ; asm: addq $1024, %rsp - adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400 - ; asm: addq $-1024, %rsp - adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00 - ; asm: addq $2147483647, %rsp - adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff - ; asm: addq $-2147483648, %rsp - adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subq %rcx, %rsp - adjust_sp_down v1 ; bin: 48 29 cc - ; asm: subq %r10, %rsp - adjust_sp_down v3 ; bin: 4c 29 d4 - ; asm: subq $64, %rsp - adjust_sp_down_imm 64 ; bin: 48 83 ec 40 - ; asm: subq $-64, %rsp - adjust_sp_down_imm -64 ; bin: 48 83 ec c0 - ; asm: subq $1024, %rsp - adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400 - ; asm: subq $-1024, %rsp - adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00 - ; asm: subq $2147483647, %rsp - adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff - ; asm: subq $-2147483648, %rsp - adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000 - - ; Shift immediates - ; asm: shlq $12, %rsi - [-,%rsi] v515 = ishl_imm v2, 12 ; bin: 48 c1 e6 0c - ; asm: shlq $13, %r8 - [-,%r8] v516 = ishl_imm v4, 13 ; bin: 49 c1 e0 0d - ; asm: sarq $32, %rsi - [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 - ; asm: sarq $33, %r8 - [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 - ; asm: shrq $62, %rsi - [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e - ; asm: shrq $63, %r8 - [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f - - - ; Rotate immediates - ; asm: rolq $12, %rsi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: 48 c1 c6 0c - ; asm: rolq $13, %r8 - [-,%r8] v5102 = rotl_imm v4, 13 ; bin: 49 c1 c0 0d - ; asm: rorq $32, %rsi - [-,%rsi] v5103 = rotr_imm v2, 32 ; bin: 48 c1 ce 20 - ; asm: rorq $33, %r8 - [-,%r8] v5104 = rotr_imm v4, 33 ; bin: 49 c1 c8 21 - - - ; Load Complex - [-,%rax] v521 = iconst.i64 1 - [-,%rbx] v522 = iconst.i64 1 - [-,%rdi] v523 = iconst.i32 1 - [-,%rsi] v524 = iconst.i32 1 - ; asm: movq (%rax,%rbx,1), %rcx - [-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18 - ; asm: movl (%rax,%rbx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movq 1(%rax,%rbx,1), %rcx - [-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01 - ; asm: movl 1(%rax,%rbx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%rax,%rbx,1),%rcx - [-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000 - ; asm: mov 0x100000(%rax,%rbx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbq (%rax,%rbx,1),%rcx - [-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18 - ; asm: movzbl (%rax,%rbx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbq (%rax,%rbx,1),%rcx - [-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18 - ; asm: movsbl (%rax,%rbx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwq (%rax,%rbx,1),%rcx - [-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18 - ; asm: movzwl (%rax,%rbx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswq (%rax,%rbx,1),%rcx - [-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18 - ; asm: movswl (%rax,%rbx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - ; asm: mov (%rax,%rbx,1),%ecx - [-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movslq (%rax,%rbx,1),%rcx - [-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18 - [-,%r13] v550 = iconst.i64 1 - [-,%r14] v551 = iconst.i64 1 - ; asm: mov 0x0(%r13,%r14,1),%r12d - [-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00 - - ; Store Complex - [-,%rcx] v600 = iconst.i64 1 - [-,%rcx] v601 = iconst.i32 1 - [-,%r10] v602 = iconst.i64 1 - [-,%r11] v603 = iconst.i32 1 - ; asm: mov %rcx,(%rax,%rbx,1) - store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18 - ; asm: mov %rcx,0x1(%rax,%rbx,1) - store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01 - ; asm: mov %rcx,0x100000(%rax,%rbx,1) - store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%rax,%rbx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%rax,%rbx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %r10w,(%rax,%rbx,1) - istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18 - ; asm: mov %r11w,(%rax,%rbx,1) - istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; asm: testq %rcx, %rcx - ; asm: je block1 - brz v1, block1 ; bin: 48 85 c9 74 1b - fallthrough block3 - -block3: - ; asm: testq %rsi, %rsi - ; asm: je block1 - brz v2, block1 ; bin: 48 85 f6 74 16 - fallthrough block4 - -block4: - ; asm: testq %r10, %r10 - ; asm: je block1 - brz v3, block1 ; bin: 4d 85 d2 74 11 - fallthrough block5 - -block5: - ; asm: testq %rcx, %rcx - ; asm: jne block1 - brnz v1, block1 ; bin: 48 85 c9 75 0c - fallthrough block6 - -block6: - ; asm: testq %rsi, %rsi - ; asm: jne block1 - brnz v2, block1 ; bin: 48 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testq %r10, %r10 - ; asm: jne block1 - brnz v3, block1 ; bin: 4d 85 d2 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - return ; bin: c3 - - ; asm: block2: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notq %rcx - [-,%rcx] v5000 = bnot v1 ; bin: 48 f7 d1 - jump block1 ; bin: eb fa -} - -; CPU flag instructions. -function %cpu_flags_I64() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%r10] v2 = iconst.i64 2 - jump block1 - -block1: - ; asm: cmpq %r10, %rcx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 4c 39 d1 - ; asm: cmpq %rcx, %r10 - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 49 39 ca - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 f8 - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f6 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f4 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f2 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f0 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e ee - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ec - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ea - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 e8 - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e6 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %r10b - [-,%r10] v24 = trueif sgt v11 ; bin: 41 0f 9f c2 - ; asm: setle %r10b - [-,%r10] v25 = trueif sle v11 ; bin: 41 0f 9e c2 - ; asm: setb %r14b - [-,%r14] v26 = trueif ult v11 ; bin: 41 0f 92 c6 - ; asm: setae %r14b - [-,%r14] v27 = trueif uge v11 ; bin: 41 0f 93 c6 - ; asm: seta %r11b - [-,%r11] v28 = trueif ugt v11 ; bin: 41 0f 97 c3 - ; asm: setbe %r11b - [-,%r11] v29 = trueif ule v11 ; bin: 41 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Debug trap. - debugtrap ; bin: cc - - ; Stack check. - ; asm: cmpq %rsp, %rcx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 48 39 e1 - ; asm: cmpq %rsp, %r10 - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 49 39 e2 - - ; asm: cmpq $-100, %rcx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 48 83 f9 9c - ; asm: cmpq $100, %r10 - [-,%rflags] v523 = ifcmp_imm v2, 100 ; bin: 49 83 fa 64 - - ; asm: cmpq $-10000, %rcx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0 - ; asm: cmpq $10000, %r10 - [-,%rflags] v525 = ifcmp_imm v2, 10000 ; bin: 49 81 fa 00002710 - - - return -} - -; Test for the encoding of outgoing_arg stack slots. -function %outargs() { - ss0 = incoming_arg 16, offset -16 - ss1 = outgoing_arg 8, offset 8 - ss2 = outgoing_arg 8, offset 0 - -block0: - [-,%rcx] v1 = iconst.i64 1 - - ; asm: movq %rcx, 8(%rsp) - [-,ss1] v10 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000008 - ; asm: movq %rcx, (%rsp) - [-,ss2] v11 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000000 - - return -} - -; Tests for i32 instructions in 64-bit mode. -; -; Note that many i32 instructions can be encoded both with and without a REX -; prefix if they only use the low 8 registers. Here, we are testing the REX -; encodings which are chosen by default. Switching to non-REX encodings should -; be done by an instruction shrinking pass. -function %I32() { - sig0 = () - fn0 = %foo() - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movl $0x01020304, %ecx - [-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: b9 01020304 - ; asm: movl $0x11020304, %esi - [-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: be 11020304 - ; asm: movl $0x21020304, %r10d - [-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304 - ; asm: movl $0xff001122, %r8d - [-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movl $0x88001122, %r14d - [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl (%rcx), %edi - [-,%rdi] v10 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%rsi), %edx - [-,%rdx] v11 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%rcx), %edi - [-,%rdi] v12 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%rsi), %edx - [-,%rdx] v13 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%rcx), %edi - [-,%rdi] v14 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%rsi), %edx - [-,%rdx] v15 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%rcx), %edi - [-,%rdi] v16 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%rsi), %edx - [-,%rdx] v17 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%rcx), %edi - [-,%rdi] v18 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%rsi), %edx - [-,%rdx] v19 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl 50(%rcx), %edi - [-,%rdi] v20 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v21 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%rcx), %edi - [-,%rdi] v22 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%rsi), %edx - [-,%rdx] v23 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%rcx), %edi - [-,%rdi] v24 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%rsi), %edx - [-,%rdx] v25 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%rcx), %edi - [-,%rdi] v26 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%rsi), %edx - [-,%rdx] v27 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%rcx), %edi - [-,%rdi] v28 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%rsi), %edx - [-,%rdx] v29 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v30 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v31 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%rcx), %edi - [-,%rdi] v32 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%rsi), %edx - [-,%rdx] v33 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%rcx), %edi - [-,%rdi] v34 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%rsi), %edx - [-,%rdx] v35 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%rcx), %edi - [-,%rdi] v36 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%rsi), %edx - [-,%rdx] v37 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%rcx), %edi - [-,%rdi] v38 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%rsi), %edx - [-,%rdx] v39 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Integer Register Operations. - - ; asm: notl %ecx - [-,%rcx] v4000 = bnot v1 ; bin: f7 d1 - ; asm: notl %esi - [-,%rsi] v4001 = bnot v2 ; bin: f7 d6 - ; asm: notl %r10d - [-,%r10] v4002 = bnot v3 ; bin: 41 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v40 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %r10d, %esi - [-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6 - ; asm: addl %ecx, %r10d - [-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca - - ; asm: subl %esi, %ecx - [-,%rcx] v50 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %r10d, %esi - [-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6 - ; asm: subl %ecx, %r10d - [-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca - - ; asm: andl %esi, %ecx - [-,%rcx] v60 = band v1, v2 ; bin: 21 f1 - ; asm: andl %r10d, %esi - [-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6 - ; asm: andl %ecx, %r10d - [-,%r10] v62 = band v3, v1 ; bin: 41 21 ca - - ; asm: orl %esi, %ecx - [-,%rcx] v70 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %r10d, %esi - [-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6 - ; asm: orl %ecx, %r10d - [-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca - - ; asm: xorl %esi, %ecx - [-,%rcx] v80 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %r10d, %esi - [-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6 - ; asm: xorl %ecx, %r10d - [-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca - - ; asm: shll %cl, %esi - [-,%rsi] v90 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %r10d - [-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2 - ; asm: sarl %cl, %esi - [-,%rsi] v92 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %r10d - [-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa - ; asm: shrl %cl, %esi - [-,%rsi] v94 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %r10d - [-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea - - ; asm: roll %cl, %esi - [-,%rsi] v96 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %r10d - [-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2 - ; asm: rorl %cl, %esi - [-,%rsi] v98 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %r10d - [-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addl $-100000, %ecx - [-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 81 c1 fffe7960 - ; asm: addl $100000, %esi - [-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 81 c6 000186a0 - ; asm: addl $0x7fffffff, %r10d - [-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff - ; asm: addl $100, %r8d - [-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64 - ; asm: addl $-100, %r14d - [-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c - - ; asm: andl $-100000, %ecx - [-,%rcx] v110 = band_imm v1, -100000 ; bin: 81 e1 fffe7960 - ; asm: andl $100000, %esi - [-,%rsi] v111 = band_imm v2, 100000 ; bin: 81 e6 000186a0 - ; asm: andl $0x7fffffff, %r10d - [-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff - ; asm: andl $100, %r8d - [-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64 - ; asm: andl $-100, %r14d - [-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c - - ; asm: orl $-100000, %ecx - [-,%rcx] v120 = bor_imm v1, -100000 ; bin: 81 c9 fffe7960 - ; asm: orl $100000, %esi - [-,%rsi] v121 = bor_imm v2, 100000 ; bin: 81 ce 000186a0 - ; asm: orl $0x7fffffff, %r10d - [-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff - ; asm: orl $100, %r8d - [-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64 - ; asm: orl $-100, %r14d - [-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c - ; asm: ret - - ; asm: xorl $-100000, %ecx - [-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 81 f1 fffe7960 - ; asm: xorl $100000, %esi - [-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 81 f6 000186a0 - ; asm: xorl $0x7fffffff, %r10d - [-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff - ; asm: xorl $100, %r8d - [-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64 - ; asm: xorl $-100, %r14d - [-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v140 = copy v2 ; bin: 89 f1 - ; asm: movl %r10d, %esi - [-,%rsi] v141 = copy v3 ; bin: 44 89 d6 - ; asm: movl %ecx, %r10d - [-,%r10] v142 = copy v1 ; bin: 41 89 ca - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v150 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %r10d, %esi - [-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2 - ; asm: imull %ecx, %r10d - [-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1 - - [-,%rax] v160 = iconst.i32 1 - [-,%rdx] v161 = iconst.i32 2 - ; asm: idivl %ecx - [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2 ; bin: int_divz f7 fe - ; asm: idivl %r10d - [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3 ; bin: int_divz 41 f7 fa - ; asm: divl %ecx - [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2 ; bin: int_divz f7 f6 - ; asm: divl %r10d - [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3 ; bin: int_divz 41 f7 f2 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %r10d, %esi - [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 - ; asm: popcntl %ecx, %r10d - [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v203 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %r10d, %esi - [-,%rsi] v204 = clz v3 ; bin: f3 41 0f bd f2 - ; asm: lzcntl %ecx, %r10d - [-,%r10] v205 = clz v1 ; bin: f3 44 0f bd d1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v206 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %r10d, %esi - [-,%rsi] v207 = ctz v3 ; bin: f3 41 0f bc f2 - ; asm: tzcntl %ecx, %r10d - [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %r10d, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 44 39 d6 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %r10d, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 44 39 d6 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %r10d, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 44 39 d6 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %r10d, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 44 39 d6 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %r10d, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 44 39 d6 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %r10d, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 44 39 d6 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %r10d, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 44 39 d6 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %r10d, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 44 39 d6 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %r10d, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 44 39 d6 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %r10d, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2 - - ; asm: cmpl $37, %ecx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3 - - ; asm: cmpl $100000, %ecx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - ; asm: movl %r10d, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 44 89 94 24 00000408 - - ; asm: movl 1032(%rsp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%rsp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - ; asm: movl 1032(%rsp), %r10d - [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 - - ; asm: movl %ecx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%rsp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; asm: cmpl %esi, %ecx - [-,%rflags] v520 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %r10d, %esi - [-,%rflags] v521 = ifcmp v2, v3 ; bin: 44 39 d6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %r10d - [-,%rflags] v523 = ifcmp_imm v3, 100 ; bin: 41 83 fa 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %r10d - [-,%rflags] v525 = ifcmp_imm v3, 10000 ; bin: 41 81 fa 00002710 - - ; asm: shll $2, %esi - [-,%rsi] v526 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: shll $12, %r10d - [-,%r10] v527 = ishl_imm v3, 12 ; bin: 41 c1 e2 0c - ; asm: sarl $5, %esi - [-,%rsi] v529 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: sarl $32, %r10d - [-,%r10] v530 = sshr_imm v3, 32 ; bin: 41 c1 fa 20 - ; asm: shrl $8, %esi - [-,%rsi] v532 = ushr_imm v2, 8 ; bin: c1 ee 08 - ; asm: shrl $31, %r10d - [-,%r10] v533 = ushr_imm v3, 31 ; bin: 41 c1 ea 1f - - ; asm: testl %ecx, %ecx - ; asm: je block1x - brz v1, block1 ; bin: 85 c9 74 18 - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1x - brz v2, block1 ; bin: 85 f6 74 14 - fallthrough block4 - -block4: - ; asm: testl %r10d, %r10d - ; asm: je block1x - brz v3, block1 ; bin: 45 85 d2 74 0f - fallthrough block5 - -block5: - ; asm: testl %ecx, %ecx - ; asm: jne block1x - brnz v1, block1 ; bin: 85 c9 75 0b - fallthrough block6 - -block6: - ; asm: testl %esi, %esi - ; asm: jne block1x - brnz v2, block1 ; bin: 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testl %r10d, %r10d - ; asm: jne block1x - brnz v3, block1 ; bin: 45 85 d2 75 02 - - ; asm: jmp block2x - jump block2 ; bin: eb 01 - - ; asm: block1x: -block1: - return ; bin: c3 - - ; asm: block2x: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notl %ecx - [-,%rcx] v5000 = bnot v1 ; bin: f7 d1 - jump block1 ; bin: eb fb - -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - ; asm: movsbl %sil, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f be d6 - ; asm: movsbl %r10b, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - ; asm: movswl %si, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f bf d6 - ; asm: movswl %r10w, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i8 conversion instructions. -function %I64_I8() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbq %cl, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f be f1 - ; asm: movsbq %sil, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f be d6 - ; asm: movsbq %r10b, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i16 conversion instructions. -function %I64_I16() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswq %cx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f bf f1 - ; asm: movswq %si, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f bf d6 - ; asm: movswq %r10w, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i32 conversion instructions. -function %I64_I32() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i32 v1 ; bin: - [-,%rsi] v12 = ireduce.i32 v2 ; bin: - [-,%r10] v13 = ireduce.i32 v3 ; bin: - - ; asm: movslq %ecx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 63 f1 - ; asm: movslq %esi, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 63 d6 - ; asm: movslq %r10d, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 63 ca - - ; asm: movl %ecx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 89 ce - ; asm: movl %esi, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 41 89 f2 - ; asm: movl %r10d, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 44 89 d1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64 jump table instructions. -function %I64_JT(i64 [%rdi]) { - jt0 = jump_table [block1, block2, block3] - -block0(v0: i64 [%rdi]): - ; Note: The next two lines will need to change whenever instructions are - ; added or removed from this test. - [-, %rax] v1 = jump_table_base.i64 jt0 ; bin: 48 8d 05 00000039 PCRelRodata4(jt0) - [-, %r10] v2 = jump_table_base.i64 jt0 ; bin: 4c 8d 15 00000032 PCRelRodata4(jt0) - - [-, %rbx] v10 = iconst.i64 1 - [-, %r13] v11 = iconst.i64 2 - - [-, %rax] v20 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 48 63 04 98 - [-, %rax] v21 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 49 63 04 9a - [-, %rax] v22 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4a 63 04 a8 - [-, %rax] v23 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4b 63 04 aa - - [-, %r10] v30 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 4c 63 14 98 - [-, %r10] v31 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 4d 63 14 9a - [-, %r10] v32 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4e 63 14 a8 - [-, %r10] v33 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4f 63 14 aa - - fallthrough block10 - -block10: - indirect_jump_table_br v10, jt0 ; bin: ff e3 -block11: - indirect_jump_table_br v11, jt0 ; bin: 41 ff e5 - -block1: - fallthrough block2 -block2: - fallthrough block3 -block3: - trap user0 -} - -function %r12_r13_loads() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - - ;; Simple GPR load. - ; asm: movq (%r12), %rdx - [-,%rdx] v4 = load.i64 notrap v1 ; bin: 49 8b 14 24 - ; asm: movq (%r13), %rdx - [-,%rdx] v5 = load.i64 notrap v2 ; bin: 49 8b 55 00 - - ;; Load with disp8. - ; asm: movq 0x1(%r12), %rdx - [-,%rdx] v6 = load.i64 notrap v1+1 ; bin: 49 8b 54 24 01 - ; asm: movq 0x1(%r13), %rdx - [-,%rdx] v7 = load.i64 notrap v2+1 ; bin: 49 8b 55 01 - - ;; Load with disp32. - ; asm: movq 0x100(%r12), %rdx - [-,%rdx] v8 = load.i64 notrap v1+256 ; bin: 49 8b 94 24 00000100 - ; asm: movq 0x100(%r13), %rdx - [-,%rdx] v9 = load.i64 notrap v2+256 ; bin: 49 8b 95 00000100 - - ;; Load for base+index. - ; asm: movq (%r12, %rax, 1), %rdx - [-,%rdx] v10 = load_complex.i64 notrap v1+v3 ; bin: 49 8b 14 04 - ; asm: movq (%r13, %rax, 1), %rdx - [-,%rdx] v11 = load_complex.i64 notrap v2+v3 ; bin: 49 8b 54 05 00 - - ;; Now for FP values. - ; asm: movss (%r12), %xmm0 - [-,%xmm0] v12 = load.f32 notrap v1 ; bin: f3 41 0f 10 04 24 - ; asm: movss (%r13), %xmm0 - [-,%xmm0] v13 = load.f32 notrap v2 ; bin: f3 41 0f 10 45 00 - - ;; Load with disp8. - ; asm: movss 0x1(%r12), %xmm0 - [-,%xmm0] v14 = load.f32 notrap v1+1 ; bin: f3 41 0f 10 44 24 01 - ; asm: movss 0x1(%r13), %xmm0 - [-,%xmm0] v15 = load.f32 notrap v2+1 ; bin: f3 41 0f 10 45 01 - - ;; Load with disp32. - ; asm: movss 0x100(%r12), %xmm0 - [-,%xmm0] v16 = load.f32 notrap v1+256 ; bin: f3 41 0f 10 84 24 00000100 - ; asm: movss 0x100(%r13), %xmm0 - [-,%xmm0] v17 = load.f32 notrap v2+256 ; bin: f3 41 0f 10 85 00000100 - - ;; Load for base+index. - ; asm: movss (%r12, %rax, 1), %xmm0 - [-,%xmm0] v18 = load_complex.f32 notrap v1+v3 ; bin: f3 41 0f 10 04 04 - ; asm: movss (%r13, %rax, 1), %xmm0 - [-,%xmm0] v19 = load_complex.f32 notrap v2+v3 ; bin: f3 41 0f 10 44 05 00 - - return -} - -function %r12_r13_stores() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - [-,%xmm0] v4 = f32const 0x1.0 - - ;; Simple GPR load. - ; asm: movq %rax, (%r12) - store notrap v3, v1; bin: 49 89 04 24 - ; asm: movq (%r13), %rdx - store notrap v3, v2; bin: 49 89 45 00 - - ; asm: movq %rax, 0x1(%r12) - store notrap v3, v1+1; bin: 49 89 44 24 01 - ; asm: movq %rax, 0x1(%r13) - store notrap v3, v2+1; bin: 49 89 45 01 - - ; asm: movq %rax, 0x100(%r12) - store notrap v3, v1+256; bin: 49 89 84 24 00000100 - ; asm: movq %rax, 0x100(%r13) - store notrap v3, v2+256; bin: 49 89 85 00000100 - - ; asm: movq %rax, (%r12, %rax, 1) - store_complex notrap v3, v1+v3; bin: 49 89 04 04 - ; asm: movq %rax, (%r13, %rax, 1) - store_complex notrap v3, v2+v3; bin: 49 89 44 05 00 - - ; asm: movb %al, (%r12) - istore8 notrap v3, v1; bin: 41 88 04 24 - ; asm: movb %al, (%r13) - istore8 notrap v3, v2; bin: 41 88 45 00 - - ; asm: movb %al, 0x1(%r12) - istore8 notrap v3, v1+1; bin: 41 88 44 24 01 - ; asm: movb %al, 0x1(%r13) - istore8 notrap v3, v2+1; bin: 41 88 45 01 - - ; asm: movb %al, 0x100(%r12) - istore8 notrap v3, v1+256; bin: 41 88 84 24 00000100 - ; asm: movb %al, 0x100(%r13) - istore8 notrap v3, v2+256; bin: 41 88 85 00000100 - - ; asm: movb %al, (%r12, %rax, 1) - istore8_complex notrap v3, v1+v3; bin: 41 88 04 04 - ; asm: movb %al, (%r13, %rax, 1) - istore8_complex notrap v3, v2+v3; bin: 41 88 44 05 00 - - ; asm: movss %xmm0, (%r12) - store notrap v4, v1; bin: f3 41 0f 11 04 24 - ; asm: movss %xmm0, (%r13) - store notrap v4, v2; bin: f3 41 0f 11 45 00 - - ; asm: movss %xmm0, 0x1(%r12) - store notrap v4, v1+1; bin: f3 41 0f 11 44 24 01 - ; asm: movss %xmm0, 0x1(%r13) - store notrap v4, v2+1; bin: f3 41 0f 11 45 01 - - ; asm: movss %xmm0, 0x100(%r12) - store notrap v4, v1+256; bin: f3 41 0f 11 84 24 00000100 - ; asm: movss %xmm0, 0x100(%r13) - store notrap v4, v2+256; bin: f3 41 0f 11 85 00000100 - - ; asm: movss %xmm0, (%r12, %rax, 1) - store_complex notrap v4, v1+v3; bin: f3 41 0f 11 04 04 - ; asm: movss %xmm0, (%r13, %rax, 1) - store_complex notrap v4, v2+v3; bin: f3 41 0f 11 44 05 00 - - return -} - -function %B64() { -block0: - [-, %rax] v1 = bconst.b64 true ; bin: 40 b8 00000001 - [-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001 - return -} - -function %V128() { -block0: - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33) - store v4, v3 ; bin: heap_oob 45 0f 11 0a - - [-, %r11] v5 = iconst.i64 0x1234 - [-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif deleted file mode 100644 index fccc691aa3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/br-i128.clif +++ /dev/null @@ -1,42 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i8 fast { -block0(v0: i128): - brz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: v5 = icmp_imm eq v3, 0 - ; nextln: v6 = icmp_imm eq v4, 0 - ; nextln: v7 = band v5, v6 - ; nextln: brnz v7, block2 - jump block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} - -function u0:1(i128) -> i8 fast { -block0(v0: i128): - brnz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: brnz v3, block2 - ; nextln: fallthrough block3 - - ; check: block3: - ; nextln: brnz.i64 v4, block2 - jump block1 - ; nextln: fallthrough block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif deleted file mode 100644 index fda005bc81..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-i8.clif +++ /dev/null @@ -1,38 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 true - return v1 - -block2: - v2 = bconst.b1 false - return v2 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brnz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brnz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 false - return v1 - -block2: - v2 = bconst.b1 true - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif deleted file mode 100644 index eb537d7c1a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v6 = fill v0 - ; nextln: v3 = icmp_imm eq v6, 0 - ; nextln: v7 = fill v1 - ; nextln: v4 = icmp_imm eq v7, 0 - ; nextln: v5 = band v3, v4 - ; nextln: brnz v5, block1 - brz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} - -function u0:1(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v3 = fill v0 - ; nextln: brnz v3, block1 - ; nextln: fallthrough block3 - ; check: block3: - ; nextln: v4 = fill.i32 v1 - ; nextln: brnz v4, block1 - brnz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif deleted file mode 100644 index e7da3f0387..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i128.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = uextend.i128 v0 - ; nextln: v7 = iconst.i64 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = sextend.i128 v0 - ; nextln: v8 = copy v0 - ; nextln: v7 = sshr_imm v8, 63 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64.clif b/cranelift/filetests/filetests/isa/x86/extend-i64.clif deleted file mode 100644 index a3d892c488..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i64.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target i686 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = uextend.i64 v0 - ; nextln: v7 = iconst.i32 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = sextend.i64 v0 - ; nextln: v10 = copy v0 - ; nextln: v7 = sshr_imm v10, 31 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0xffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif deleted file mode 100644 index 3bc9adf5bc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif +++ /dev/null @@ -1,17 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif deleted file mode 100644 index 6fff51c7b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif +++ /dev/null @@ -1,31 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif deleted file mode 100644 index eda7b6dffd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> i128 system_v { -block0: - v0 = iconst.i64 0 - v1 = iconst.i64 0 - v2 = iconcat v0, v1 - jump block5 - -block2: - jump block4(v27) - -block4(v23: i128): - return v23 - -block5: - v27 = bxor.i128 v2, v2 - v32 = iconst.i32 0 - brz v32, block2 - jump block6 - -block6: - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif deleted file mode 100644 index b171c0ccfd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128.clif +++ /dev/null @@ -1,46 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 fast { -block0(v0: i64, v1: i64): -;check: block0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]): - - v2 = iconcat.i64 v0, v1 - ; check: regmove v0, %rdi -> %rax - ; check: regmove v1, %rsi -> %rdx - - return v2 - ; check: v4 = x86_pop.i64 - ; check: return v0, v1, v4 -} - -function u0:1(i128) -> i64, i64 fast { -block0(v0: i128): -; check: block0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]): - - v1, v2 = isplit v0 - ; check: regmove v3, %rdi -> %rax - ; check: regmove v4, %rsi -> %rdx - - return v1, v2 - ; check: v6 = x86_pop.i64 - ; check: return v3, v4, v6 -} - -function u0:2(i64, i128) fast { -; check: block0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]): -block0(v0: i64, v1: i128): - ; check: store v2, v0+8 - ; check: store v3, v0+16 - store v1, v0+8 - return -} - -function u0:3(i64) -> i128 fast { -block0(v0: i64): - ; check: v2 = load.i64 v0+8 - ; check: v3 = load.i64 v0+16 - v1 = load.i128 v0+8 - ; check: return v2, v3, v5 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif deleted file mode 100644 index dd75cac4a1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> i8 fast { -block0(v0: i16): - v1 = ireduce.i8 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif deleted file mode 100644 index 9aedb61001..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif +++ /dev/null @@ -1,20 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 system_v { -block0(v0: i64, v1: i64): - trap user0 - -block30: - v245 = iconst.i64 0 - v246 = iconcat v245, v245 - ; The next instruction used to be legalized twice, causing a panic the second time. - v250, v251 = isplit.i128 v370 - v252, v253 = isplit v246 - trap user0 - -block45: - v369 = iconst.i64 0 - v370 = load.i128 v369 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif deleted file mode 100644 index 948fa34d99..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = iconst.i8 0 - v2 = isub v1, v0 - ; check: uextend.i32 - ; nextln: iconst.i32 - ; nextln: isub - ; nextln: ireduce.i8 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif deleted file mode 100644 index a08356ca53..0000000000 --- a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) system_v { -block0(v0: i128): - jump block1(v0) - -block1(v1: i128): - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif deleted file mode 100644 index 9d88db9d17..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0() -> i8 fast { -block0: - v14 = bconst.b1 false - v15 = bint.i8 v14 - return v15 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif deleted file mode 100644 index acdd21c9f0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:51(i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - -block0(v0: i64, v1: i64): - v2 = stack_addr.i64 ss1 - v3 = load.i8 v1 - store v3, v2 - v4 = stack_addr.i64 ss2 - v5 = stack_addr.i64 ss3 - jump block1 - -block1: - v6 = load.i8 v2 - store v6, v5 - v7 = load.i8 v5 - v8 = bnot v7 - store v8, v4 - v9 = load.i8 v4 - store v9, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif deleted file mode 100644 index f64108531c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif +++ /dev/null @@ -1,46 +0,0 @@ -test legalizer - -target x86_64 legacy - -function %br_icmp(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1 - jump block1 - -block1: - return -} - -; sameln: function %br_icmp(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v2 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v2, block1 -; nextln: [Op1jmpb#eb] jump block1 -; nextln: -; nextln: block1: -; nextln: [Op1ret#c3] return -; nextln: } - - -function %br_icmp_args(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1(v0) - jump block1(v0) - -block1(v2: i64): - return -} - -; sameln: function %br_icmp_args(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v3 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v3, block1(v0) -; nextln: [Op1jmpb#eb] jump block1(v0) -; nextln: -; nextln: block1(v2: i64): -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif deleted file mode 100644 index c931d6cacf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64) system_v { - ss0 = explicit_slot 1 - jt0 = jump_table [block1] - -block0(v0: i64): - v1 = stack_addr.i64 ss0 - v2 = load.i8 v1 - br_table v2, block2, jt0 -; check: $(oob=$V) = ifcmp_imm $(idx=$V), 1 -; block2 is replaced by block1 by fold_redundant_jump -; nextln: brif uge $oob, block1 -; nextln: fallthrough $(inb=$BB) -; check: $inb: -; nextln: $(final_idx=$V) = uextend.i64 $idx -; nextln: $(base=$V) = jump_table_base.i64 jt0 -; nextln: $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0 -; nextln: $(addr=$V) = iadd $base, $rel_addr -; nextln: indirect_jump_table_br $addr, jt0 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif deleted file mode 100644 index 7c135d54ae..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) fast { -fn0 = %black_box(i8) -ss0 = explicit_slot 1 ; black box - -block0(v0: i8, v1: i8): - v99 = stack_addr.i64 ss0 - - ; check: istore8 $(V), $(V) - - v2 = band v0, v1 - store v2, v99 - v3 = bor v0, v1 - store v3, v99 - v4 = bxor v0, v1 - store v4, v99 - v5 = bnot v0 - store v5, v99 - v6 = band_not v0, v1 - store v6, v99 - v7 = bor_not v0, v1 - store v7, v99 - v8 = bxor_not v0, v1 - store v8, v99 - v9 = band_imm v0, 42 - store v9, v99 - v10 = bor_imm v0, 42 - store v10, v99 - v11 = bxor_imm v0, 42 - store v11, v99 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif deleted file mode 100644 index b21099281e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-call.clif +++ /dev/null @@ -1,14 +0,0 @@ -; Test legalization of a non-colocated call in 64-bit non-PIC mode. -test legalizer -set opt_level=speed_and_size -target x86_64 legacy haswell - -function %call() { - fn0 = %foo() -block0: - call fn0() - return -} - -; check: v0 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v0() diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif deleted file mode 100644 index af5e158b07..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8) -> i8, i8 fast { -block0(v0: i8): - v1 = clz v0 - ; check: v3 = uextend.i32 v0 - ; nextln: v6 = iconst.i32 -1 - ; nextln: v7 = iconst.i32 31 - ; nextln: v8, v9 = x86_bsr v3 - ; nextln: v10 = selectif.i32 eq v9, v6, v8 - ; nextln: v4 = isub v7, v10 - ; nextln: v5 = iadd_imm v4, -24 - ; nextln: v1 = ireduce.i8 v5 - v2 = ctz v0 - ; nextln: v11 = uextend.i32 v0 - ; nextln: v12 = bor_imm v11, 256 - ; nextln: v14 = iconst.i32 32 - ; nextln: v15, v16 = x86_bsf v12 - ; nextln: v13 = selectif.i32 eq v16, v14, v15 - ; nextln: v2 = ireduce.i8 v13 - return v1, v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif deleted file mode 100644 index 0c51e064dd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif +++ /dev/null @@ -1,133 +0,0 @@ -; Test the custom legalizations. -test legalizer -target i686 legacy -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %cond_trap(i32) { -block0(v1: i32): - trapz v1, user67 - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $f, user67 - ; nextln: return -} - -function %cond_trap2(i32) { -block0(v1: i32): - trapnz v1, int_ovf - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif ne $f, int_ovf - ; nextln: return -} - -function %cond_trap_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapz v2, user7 - return - ; check: block0(v1: i32 - ; check: brnz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user7 - ; check: $new: - ; nextln: return -} - -function %cond_trap2_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapnz v2, user9 - return - ; check: block0(v1: i32 - ; check: brz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user9 - ; check: $new: - ; nextln: return -} - -function %f32const() -> f32 { -block0: - v1 = f32const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i32 - ; check: v1 = bitcast.f32 $tmp - return v1 -} - -function %select_f64(f64, f64, i32) -> f64 { -block0(v0: f64, v1: f64, v2: i32): - v3 = select v2, v0, v1 - ; check: brnz v2, $(new=$BB)(v0) - ; nextln: jump $new(v1) - ; check: $new(v3: f64): - ; nextln: return v3 - return v3 -} - -function %f32_min(f32, f32) -> f32 { -block0(v0: f32, v1: f32): - v2 = fmin v0, v1 - return v2 - ; check: $(vnat=$V) = x86_fmin.f32 v0, v1 - ; nextln: jump $(done=$BB)($vnat) - - ; check: $(uno=$BB): - ; nextln: $(vuno=$V) = fadd.f32 v0, v1 - ; nextln: jump $(done=$BB)($vuno) - - ; check: $(ueq=$BB): - ; check: $(veq=$V) = bor.f32 v0, v1 - ; nextln: jump $(done=$BB)($veq) - - ; check: $done(v2: f32): - ; nextln: return v2 -} - -function %ineg_legalized_i8() { -block0: - v0 = iconst.i8 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i8 v2 - ; nextln: v3 = iconst.i8 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i8 v6 - - return -} - -function %ineg_legalized_i16() { -block0: - v0 = iconst.i16 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i16 v2 - ; nextln: v3 = iconst.i16 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i16 v6 - - return -} - -function %ineg_legalized_i32() { -block0: - v0 = iconst.i32 1 - v1 = ineg v0 - ; check: v0 = iconst.i32 1 - ; nextln: v2 = iconst.i32 0 - ; nextln: v1 = isub v2, v0 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif deleted file mode 100644 index 9e579c1bcd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif +++ /dev/null @@ -1,192 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div.clif. -set avoid_div_traps=1 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %sdiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %sdiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = sdiv v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %srem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %srem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif deleted file mode 100644 index b172a9aef3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div.clif +++ /dev/null @@ -1,57 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div-traps.clif. -set avoid_div_traps=0 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif deleted file mode 100644 index 43f57f8372..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the legalization of f64const. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f64const() -> f64 { -block0: - v1 = f64const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i64 - ; check: v1 = bitcast.f64 $tmp - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif deleted file mode 100644 index 32a256c9e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_uint.f64 v0 - return v1 -} - -function u0:1(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_sint.f64 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif deleted file mode 100644 index 242a0f8dfa..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif +++ /dev/null @@ -1,123 +0,0 @@ -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; Test legalization for various forms of heap addresses. -; regex: BB=block\d+ - -function %heap_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 64 - gv1 = iadd_imm.i64 gv4, 72 - gv2 = iadd_imm.i64 gv4, 80 - gv3 = load.i64 notrap aligned gv4+88 - - heap0 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 - heap1 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 - heap2 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i64 - heap3 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i64 - heap4 = dynamic gv1, min 0x1_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - heap5 = dynamic gv1, bound gv3, offset_guard 0x1000, index_type i32 - heap6 = dynamic gv1, min 0x1_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - heap7 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i64 - - ; check: heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - ; check: heap1 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i32 - ; check: heap2 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i64 - ; check: heap3 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i64 - ; check: heap4 = dynamic gv1, min 0x0001_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - ; check: heap5 = dynamic gv1, min 0, bound gv3, offset_guard 4096, index_type i32 - ; check: heap6 = dynamic gv1, min 0x0001_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - ; check: heap7 = dynamic gv1, min 0, bound gv2, offset_guard 4096, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - ; The fast-path; 32-bit index, static heap with a sufficient bound, no bounds check needed! - v4 = heap_addr.i64 heap0, v0, 0 - ; check: v12 = uextend.i64 v0 - ; check: v13 = iadd_imm v3, 64 - ; check: v4 = iadd v13, v12 - - v5 = heap_addr.i64 heap1, v0, 0 - ; check: v14 = uextend.i64 v0 - ; check: v15 = icmp_imm ugt v14, 0x0001_0000 - ; check: brz v15, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap heap_oob - ; check: $resume_1: - ; check: v16 = iadd_imm.i64 v3, 64 - ; check: v5 = iadd v16, v14 - - v6 = heap_addr.i64 heap2, v1, 0 - ; check: v19 = iconst.i64 0x0001_0000_0000 - ; check: v17 = icmp.i64 ugt v1, v19 - ; check: brz v17, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap heap_oob - ; check: $resume_2: - ; check: v18 = iadd_imm.i64 v3, 64 - ; check: v6 = iadd v18, v1 - - v7 = heap_addr.i64 heap3, v1, 0 - ; check: v20 = icmp_imm.i64 ugt v1, 0x0001_0000 - ; check: brz v20, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap heap_oob - ; check: $resume_3: - ; check: v21 = iadd_imm.i64 v3, 64 - ; check: v7 = iadd v21, v1 - - v8 = heap_addr.i64 heap4, v0, 0 - ; check: v22 = uextend.i64 v0 - ; check: v23 = load.i64 notrap aligned v3+88 - ; check: v24 = iadd_imm v23, 0 - ; check: v25 = icmp ugt v22, v24 - ; check: brz v25, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap heap_oob - ; check: $resume_4: - ; check: v26 = iadd_imm.i64 v3, 72 - ; check: v8 = iadd v26, v22 - - v9 = heap_addr.i64 heap5, v0, 0 - ; check: v27 = uextend.i64 v0 - ; check: v28 = load.i64 notrap aligned v3+88 - ; check: v29 = iadd_imm v28, 0 - ; check: v30 = icmp ugt v27, v29 - ; check: brz v30, $(resume_5=$BB) - ; nextln: jump $(trap_5=$BB) - ; check: $trap_5: - ; nextln: trap heap_oob - ; check: $resume_5: - ; check: v31 = iadd_imm.i64 v3, 72 - ; check: v9 = iadd v31, v27 - - v10 = heap_addr.i64 heap6, v1, 0 - ; check: v32 = iadd_imm.i64 v3, 80 - ; check: v33 = iadd_imm v32, 0 - ; check: v34 = icmp.i64 ugt v1, v33 - ; check: brz v34, $(resume_6=$BB) - ; nextln: jump $(trap_6=$BB) - ; check: $trap_6: - ; nextln: trap heap_oob - ; check: $resume_6: - ; check: v35 = iadd_imm.i64 v3, 72 - ; check: v10 = iadd v35, v1 - - v11 = heap_addr.i64 heap7, v1, 0 - ; check: v36 = iadd_imm.i64 v3, 80 - ; check: v37 = iadd_imm v36, 0 - ; check: v38 = icmp.i64 ugt v1, v37 - ; check: brz v38, $(resume_7=$BB) - ; nextln: jump $(trap_7=$BB) - ; check: $trap_7: - ; nextln: trap heap_oob - ; check: $resume_7: - ; check: v39 = iadd_imm.i64 v3, 72 - ; check: v11 = iadd v39, v1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif deleted file mode 100644 index 276de82d4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of i128 instructions on x86_64. -test legalizer -target x86_64 legacy haswell - -; regex: V=v\d+ - -function %imul(i128, i128) -> i128 { -block0(v1: i128, v2: i128): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif deleted file mode 100644 index 7e2d381947..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif +++ /dev/null @@ -1,357 +0,0 @@ -; Test the legalization of i64 instructions on x86_32. -test legalizer -target i686 legacy haswell - -; regex: V=v\d+ - -function %iadd(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = iadd v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %isub(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = isub v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %imul(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %icmp_eq(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp eq v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_eq(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm eq v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_ne(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ne v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_ne(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ne v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_sgt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sgt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sgt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sgt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_slt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp slt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_slt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm slt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sle(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sle v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sle(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sle v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ugt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ugt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ugt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ugt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_uge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp uge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_uge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm uge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ult(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ult v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ult(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ult v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ule(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ule v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ule(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ule v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v3 = iconst.i32 0 - ; nextln: v0 = iconcat v2, v3 - ; nextln: v5 = iconst.i32 0 - ; nextln: v6 = iconst.i32 0 - ; nextln: v4 = iconcat v5, v6 - ; nextln: v7, v8 = isub_ifbout v5, v2 - ; nextln: v9 = isub_ifbin v6, v3, v8 - ; nextln: v1 = iconcat v7, v9 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif deleted file mode 100644 index 32f2b3d3e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = icmp_imm sle v0, 0 - ; check: $(e1=$V) = sextend.i32 v0 - ; nextln: v2 = icmp_imm sle $e1, 0 - v3 = bint.i8 v2 - v4 = icmp eq v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; nextln: $(e3=$V) = uextend.i32 v1 - ; nextln: v4 = icmp eq $e2, $e3 - v5 = bint.i8 v4 - v6 = iadd v3, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif deleted file mode 100644 index 1e6a70434a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif +++ /dev/null @@ -1,18 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64) system_v { - ss0 = explicit_slot 0 - -block0(v0: i64): - jump block1 - -block1: -; _0 = const 42u8 - v1 = iconst.i8 42 - store v1, v0 -; -; return - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif deleted file mode 100644 index b1f5b12095..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - -block0(v0: i64, v1: i8, v2: i8): - v11 = imul v1, v2 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif deleted file mode 100644 index 4f84d93d0b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8) system_v { - ss0 = explicit_slot 1 - -block0(v0: i64, v1: i8): - v3 = stack_addr.i64 ss0 - v5 = load.i8 v3 - v6 = iconst.i8 2 - v7 = imul_imm v5, 42 - store v7, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif deleted file mode 100644 index a36a2d6ed0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the custom legalization of ineg.i64 on x86_64. -test legalizer -target x86_64 legacy - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v0 = iconst.i64 1 - ; nextln: v2 = iconst.i64 0 - ; nextln: v1 = isub v2, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif deleted file mode 100644 index 527710d4fe..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i64 v2 - ; check: v3 = copy v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif deleted file mode 100644 index 3ad3f4c69f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - v3 = ireduce.i32 v2 - ; check: v3 = fill v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif deleted file mode 100644 index 0d042bf3ff..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i64, i64 fast { -; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): -block0(v0: i128): - jump block2 - -block1: - ; When this `isplit` is legalized, the bnot below is not yet legalized, - ; so there isn't a corresponding `iconcat` yet. We should try legalization - ; for this `isplit` again once all instrucions have been legalized. - v2, v3 = isplit.i128 v1 - ; return v6, v7 - return v2, v3 - -block2: - ; check: v6 = bnot.i64 v4 - ; check: v2 -> v6 - ; check: v7 = bnot.i64 v5 - ; check: v3 -> v7 - v1 = bnot.i128 v0 - jump block1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif deleted file mode 100644 index 838a915bf0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif +++ /dev/null @@ -1,15 +0,0 @@ -test legalizer - -; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations. -set is_pic -target x86_64 legacy - -function %floor(f32) -> f32 { -block0(v0: f32): - v1 = floor v0 - return v1 -} -; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] fast { -; check: sig0 = (f32 [%xmm0]) -> f32 [%xmm0] system_v -; check: fn0 = %FloorF32 sig0 -; check: v1 = call fn0(v0) diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif deleted file mode 100644 index 4cbf3e088e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - ss4 = explicit_slot 1 - -block0(v0: i64, v1: i8, v2: i8): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i8 v3 - store v7, v5 - v8 = load.i8 v4 - store v8, v6 - v9 = load.i8 v5 - v10 = load.i8 v6 - v11 = imul v9, v10 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif deleted file mode 100644 index 11a0f1d20f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif +++ /dev/null @@ -1,115 +0,0 @@ -; Test the legalization of memory objects. -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %vmctx(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, -16 - -block1(v1: i64): - v2 = global_value.i64 gv1 - ; check: v2 = iadd_imm v1, -16 - return v2 - ; check: return v2 -} - -function %load(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0-16 - gv2 = iadd_imm.i64 gv1, 32 - -block1(v1: i64): - v2 = global_value.i64 gv2 - ; check: $(p1=$V) = load.i64 notrap aligned v1-16 - ; check: v2 = iadd_imm $p1, 32 - return v2 - ; check: return v2 -} - -function %symbol() -> i64 { - gv0 = symbol %something - gv1 = symbol u123:456 - -block1: - v0 = global_value.i64 gv0 - ; check: v0 = symbol_value.i64 gv0 - v1 = global_value.i64 gv1 - ; check: v1 = symbol_value.i64 gv1 - v2 = bxor v0, v1 - return v2 -} - -; SpiderMonkey VM-style static 4+2 GB heap. -; This eliminates bounds checks completely for offsets < 2GB. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 1 - ; Boundscheck should be eliminated. - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; nextln: $(xoff=$V) = uextend.i64 v0 - ; check: $(hbase=$V) = iadd_imm v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+16 - ; nextln: v2 = load.f32 v1+16 - v3 = load.f32 v1+20 - ; nextln: v3 = load.f32 v1+20 - v4 = fadd v2, v3 - return v4 -} - -function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; Everything after the obviously OOB access should be eliminated, leaving - ; the `trap heap_oob` instruction as the terminator of the block and moving - ; the remainder of the instructions into an inaccessible block. - ; check: block0( - ; nextln: trap heap_oob - ; check: block1: - ; nextln: v1 = iconst.i64 0 - ; nextln: v2 = load.f32 v1+16 - ; nextln: return v2 - ; nextln: } - v1 = heap_addr.i64 heap0, v0, 0x1000_0001 - v2 = load.f32 v1+16 - return v2 -} - - -; SpiderMonkey VM-style static 4+2 GB heap. -; Offsets >= 2 GB do require a boundscheck. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 0x8000_0000 - ; Boundscheck code - ; check: $(xoff=$V) = uextend.i64 v0 - ; check: $(oob=$V) = icmp - ; nextln: brz $oob, $(ok=$BB) - ; nextln: jump $(trap_oob=$BB) - ; check: $trap_oob: - ; nextln: trap heap_oob - ; check: $ok: - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; check: $(hbase=$V) = iadd_imm.i64 v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+0x7fff_ffff - ; nextln: v2 = load.f32 v1+0x7fff_ffff - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif deleted file mode 100644 index 179ef824f3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif +++ /dev/null @@ -1,43 +0,0 @@ -test compile -target x86_64 legacy baseline - -; umulhi/smulhi on 64 bit operands - -function %i64_umulhi(i64, i64) -> i64 { -block0(v10: i64, v11: i64): - v12 = umulhi v10, v11 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v12 -} - -function %i64_smulhi(i64, i64) -> i64 { -block0(v20: i64, v21: i64): - v22 = smulhi v20, v21 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v22 -} - - -; umulhi/smulhi on 32 bit operands - -function %i32_umulhi(i32, i32) -> i32 { -block0(v30: i32, v31: i32): - v32 = umulhi v30, v31 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v32 -} - -function %i32_smulhi(i32, i32) -> i32 { -block0(v40: i32, v41: i32): - v42 = smulhi v40, v41 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v42 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif deleted file mode 100644 index fb9c4f49b8..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif +++ /dev/null @@ -1,9 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = popcnt v0 - ; check-not: sextend.i32 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif deleted file mode 100644 index f770ba5643..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 8 - ss2 = explicit_slot 8 - ss3 = explicit_slot 2 - ss4 = explicit_slot 8 - sig0 = (i64, i16, i64) system_v - fn0 = colocated u0:11 sig0 - -block0(v0: i64, v1: i64, v2: i64): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i64 v3 - v8 = load.i16 v7 - store v8, v5 - v9 = load.i64 v4 - store v9, v6 - v10 = load.i16 v5 - v11 = load.i64 v6 - call fn0(v0, v10, v11) - jump block2 - -block2: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif deleted file mode 100644 index e058602615..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif +++ /dev/null @@ -1,35 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ -; regex: R=%[a-z0-9]+ - -function %i32_rotr(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotr v0, v1 - v2 = rotr v0, v1 - return v2 -} - -function %i32_rotr_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotr_imm v0, 1 - v2 = rotr_imm v0, 1 - return v2 -} - -function %i32_rotl(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotl v0, v1 - v2 = rotl v0, v1 - return v2 -} - -function %i32_rotl_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotl_imm v0, 1 - v2 = rotl_imm v0, 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif deleted file mode 100644 index 9759a8b155..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = ishl v0, v1 - ; check: $(e1=$V) = uextend.i32 v0 - ; check: $(r1=$V) = ishl $e1, v1 - ; check v2 = ireduce.i8 $r1 - v3 = ushr v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; check: $(r2=$V) = ushr $e2, v1 - ; check v2 = ireduce.i8 $r2 - v4 = sshr v0, v1 - ; check: $(e3=$V) = sextend.i32 v0 - ; check: $(r3=$V) = sshr $e3, v1 - ; check v2 = ireduce.i8 $r3 - - v5 = iadd v2, v3 - v6 = iadd v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif deleted file mode 100644 index 10912afe76..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif +++ /dev/null @@ -1,73 +0,0 @@ -test legalizer -target x86_64 legacy - -; Test legalization for various forms of table addresses. -; regex: BB=block\d+ - -function %table_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 72 - gv1 = iadd_imm.i64 gv4, 80 - gv2 = load.i32 notrap aligned gv4+88 - - table0 = dynamic gv0, min 0x1_0000, bound gv2, element_size 1, index_type i32 - table1 = dynamic gv0, bound gv2, element_size 16, index_type i32 - table2 = dynamic gv0, min 0x1_0000, bound gv1, element_size 1, index_type i64 - table3 = dynamic gv0, bound gv1, element_size 16, index_type i64 - - ; check: table0 = dynamic gv0, min 0x0001_0000, bound gv2, element_size 1, index_type i32 - ; check: table1 = dynamic gv0, min 0, bound gv2, element_size 16, index_type i32 - ; check: table2 = dynamic gv0, min 0x0001_0000, bound gv1, element_size 1, index_type i64 - ; check: table3 = dynamic gv0, min 0, bound gv1, element_size 16, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - v4 = table_addr.i64 table0, v0, +0 - ; check: v8 = load.i32 notrap aligned v3+88 - ; check: v9 = icmp uge v0, v8 - ; check: brz v9, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap table_oob - ; check: $resume_1: - ; check: v10 = uextend.i64 v0 - ; check: v11 = iadd_imm.i64 v3, 72 - ; check: v4 = iadd v11, v10 - - v5 = table_addr.i64 table1, v0, +0 - ; check: v12 = load.i32 notrap aligned v3+88 - ; check: v13 = icmp.i32 uge v0, v12 - ; check: brz v13, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap table_oob - ; check: $resume_2: - ; check: v14 = uextend.i64 v0 - ; check: v15 = iadd_imm.i64 v3, 72 - ; check: v16 = ishl_imm v14, 4 - ; check: v5 = iadd v15, v16 - - v6 = table_addr.i64 table2, v1, +0 - ; check: v17 = iadd_imm.i64 v3, 80 - ; check: v18 = icmp.i64 uge v1, v17 - ; check: brz v18, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap table_oob - ; check: $resume_3: - ; check: v19 = iadd_imm.i64 v3, 72 - ; check: v6 = iadd v19, v1 - - v7 = table_addr.i64 table3, v1, +0 - ; check: v20 = iadd_imm.i64 v3, 80 - ; check: v21 = icmp.i64 uge v1, v20 - ; check: brz v21, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap table_oob - ; check: $resume_4: - ; check: v22 = iadd_imm.i64 v3, 72 - ; check: v23 = ishl_imm.i64 v1, 4 - ; check: v7 = iadd v22, v23 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif deleted file mode 100644 index 7be308308c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = urem v0, v1 - ; check: $(a=$V) = uextend.i32 v0 - ; nextln: $(b=$V) = uextend.i32 v1 - ; nextln: $(c=$V) = iconst.i32 0 - ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b - ; nextln: v2 = ireduce.i8 $r - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif deleted file mode 100644 index ff5d11a4d7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif +++ /dev/null @@ -1,51 +0,0 @@ -test compile -set enable_simd -target i686 legacy haswell - -function u0:1(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:2(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:3(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:4(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:5(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:6(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif deleted file mode 100644 index 4e0af65c9f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i64, i32) system_v { -block0(v0: i64, v1: i32): - v2 = bor v0, v0 - store v2, v1 - return -} - -function u0:1(i32) -> i64 system_v { -block0(v1: i32): - v0 = load.i64 v1 - v2 = bor v0, v0 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif deleted file mode 100644 index cafa90eb4f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/nop.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function %test(i32) -> i32 system_v { -block0(v0: i32): - nop - v1 = iconst.i32 42 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif deleted file mode 100644 index b5a9658b67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif +++ /dev/null @@ -1,52 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} - -function %zero_dword() -> i32 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i32 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif deleted file mode 100644 index 8e469b8b7a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif +++ /dev/null @@ -1,72 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} - -function %imm_zero_register() -> i64 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i64 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff - ; asm: xor %r8, r8 - [-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0 - ; asm: xor %r15, %r15 - [-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %r8d, %r8d - [-,%r15] v0 = iconst.i8 0 ; bin: 45 31 ff - ; asm: xor %eax, eax - [-,%rax] v1 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v2 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif deleted file mode 100644 index b9bc230c33..0000000000 --- a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile - -set enable_pinned_reg=true -set use_pinned_reg_as_heap_base=true -set opt_level=speed_and_size - -target x86_64 legacy - -; regex: V=v\d+ - -; r15 is the pinned heap register. It must not be rewritten, so it must not be -; used as a tied output register. -function %tied_input() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 - -;; It musn't be used even if this is a tied input used twice. -function %tied_twice() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd v1, v1 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd v1, v1 - -function %uses() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - v3 = get_pinned_reg.i64 - v4 = iadd v2, v3 - return v4 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 -; nextln: ,%r15 -; sameln: v3 = get_pinned_reg.i64 -; nextln: ,%rax] -; sameln: iadd v2, v3 - -; When the pinned register is used as the heap base, the final load instruction -; must use the %r15 register, since x86 implements the complex addressing mode. -function u0:1(i64 vmctx) -> i64 system_v { - gv0 = vmctx - heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - -block0(v42: i64): - v5 = iconst.i32 42 - v6 = heap_addr.i64 heap0, v5, 0 - v7 = load.i64 v6 - return v7 -} - -; check: ,%r15] -; sameln: $(heap_base=$V) = get_pinned_reg.i64 -; nextln: load_complex.i64 $heap_base+ diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif deleted file mode 100644 index 4b4a05244c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_func_adjusts_sp=1 -target x86_64 legacy - -; Like %big in probestack.clif, but with the probestack function adjusting -; the stack pointer itself. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [Op1call_id#e8] call fn0(v1) -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif deleted file mode 100644 index 6b9b4f3342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set enable_probestack=0 -target x86_64 legacy - -; Like %big in probestack.clif, but with probes disabled. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif deleted file mode 100644 index 2837ddd0c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif +++ /dev/null @@ -1,27 +0,0 @@ -test compile -target x86_64 legacy - -; Like %big in probestack.clif, but without a colocated libcall. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0 -; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64 -; nextln: [Op1ret#c3] return v4 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif deleted file mode 100644 index efb1900170..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-size.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_size_log2=13 -target x86_64 legacy - -; Like %big in probestack.clif, but now the probestack size is bigger -; and it no longer needs a probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} - -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %big; still doesn't need a probe. - -function %bigger() system_v { - ss0 = explicit_slot 8192 -block0: - return -} - -; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8192, offset -8208 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %bigger; this needs a probe. - -function %biggest() system_v { - ss0 = explicit_slot 8193 -block0: - return -} - -; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8193, offset -8209 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif deleted file mode 100644 index c434cf5f63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack.clif +++ /dev/null @@ -1,49 +0,0 @@ -test compile -set use_colocated_libcalls=1 -target x86_64 legacy - -; A function with a big stack frame. This should have a stack probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } - - -; A function with a small enough stack frame. This shouldn't have a stack probe. - -function %small() system_v { - ss0 = explicit_slot 4096 -block0: - return -} - -; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4096, offset -4112 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif deleted file mode 100644 index 831928186b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif +++ /dev/null @@ -1,314 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -set enable_probestack=false -target x86_64 legacy haswell - -; An empty function. - -function %empty() { -block0: - return -} - -; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function with a single stack slot. - -function %one_stack_slot() { - ss0 = explicit_slot 168 -block0: - return -} - -; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function performing a call. - -function %call() { - fn0 = %foo() - -block0: - call fn0() - return -} - -; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: sig0 = () fast -; nextln: fn0 = %foo sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: call fn0() -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function that uses a lot of registers but doesn't quite need to spill. - -function %no_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} - -; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; nextln: ss0 = incoming_arg 56, offset -56 -; nextln: -; nextln: block0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]): -; nextln: x86_push v15 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v16 -; nextln: x86_push v17 -; nextln: x86_push v18 -; nextln: x86_push v19 -; nextln: x86_push v20 -; nextln: v2 = load.i32 v0 -; nextln: v3 = load.i32 v0+8 -; nextln: v4 = load.i32 v0+16 -; nextln: v5 = load.i32 v0+24 -; nextln: v6 = load.i32 v0+32 -; nextln: v7 = load.i32 v0+40 -; nextln: v8 = load.i32 v0+48 -; nextln: v9 = load.i32 v0+56 -; nextln: v10 = load.i32 v0+64 -; nextln: v11 = load.i32 v0+72 -; nextln: v12 = load.i32 v0+80 -; nextln: v13 = load.i32 v0+88 -; nextln: v14 = load.i32 v0+96 -; nextln: store v2, v1 -; nextln: store v3, v1+8 -; nextln: store v4, v1+16 -; nextln: store v5, v1+24 -; nextln: store v6, v1+32 -; nextln: store v7, v1+40 -; nextln: store v8, v1+48 -; nextln: store v9, v1+56 -; nextln: store v10, v1+64 -; nextln: store v11, v1+72 -; nextln: store v12, v1+80 -; nextln: store v13, v1+88 -; nextln: store v14, v1+96 -; nextln: v26 = x86_pop.i64 -; nextln: v25 = x86_pop.i64 -; nextln: v24 = x86_pop.i64 -; nextln: v23 = x86_pop.i64 -; nextln: v22 = x86_pop.i64 -; nextln: v21 = x86_pop.i64 -; nextln: return v21, v22, v23, v24, v25, v26 -; nextln: } - -; This function requires too many registers and must spill. - -function %yes_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.i32 v0+104 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.i32 v15, v1+104 - return -} - -; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; check: ss0 = spill_slot - -; check: block0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]): -; nextln: x86_push v48 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v49 -; nextln: x86_push v50 -; nextln: x86_push v51 -; nextln: x86_push v52 -; nextln: x86_push v53 -; nextln: adjust_sp_down_imm - -; check: spill - -; check: fill - -; check: adjust_sp_up_imm -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: v56 = x86_pop.i64 -; nextln: v55 = x86_pop.i64 -; nextln: v54 = x86_pop.i64 -; nextln: return v54, v55, v56, v57, v58, v59 -; nextln: } - -; A function which uses diverted registers. - -function %divert(i32) -> i32 system_v { -block0(v0: i32): - v2 = iconst.i32 0 - v3 = iconst.i32 1 - jump block1(v0, v3, v2) - -block1(v4: i32, v5: i32, v6: i32): - brz v4, block3 - jump block2 - -block2: - v7 = iadd v5, v6 - v8 = iadd_imm v4, -1 - jump block1(v8, v7, v5) - -block3: - return v5 -} - -; check: function %divert -; check: regmove.i32 v5, %rcx -> %rbx -; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 - -; Stack limit checking - -function %stack_limit(i64 stack_limit) { - ss0 = explicit_slot 168 -block0(v0: i64): - return -} - -; check: function %stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v4: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = iadd_imm v1, 176 -; nextln: v3 = ifcmp_sp v2 -; nextln: trapif uge v3, stk_ovf -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %big_stack_limit(i64 stack_limit) { - ss0 = explicit_slot 40000 -block0(v0: i64): - return -} - -; check: function %big_stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 40000, offset -40016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = ifcmp_sp v1 -; nextln: trapif uge v2, stk_ovf -; nextln: v3 = iadd_imm v1, 0x9c40 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 0x9c40 -; nextln: adjust_sp_up_imm 0x9c40 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -function %limit_preamble(i64 vmctx) { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0 - gv2 = load.i64 notrap aligned gv1+4 - stack_limit = gv2 - ss0 = explicit_slot 20 -block0(v0: i64): - return -} - -; check: function %limit_preamble(i64 vmctx [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 20, offset -36 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned gv0 -; nextln: gv2 = load.i64 notrap aligned gv1+4 -; nextln: stack_limit = gv2 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = load.i64 notrap aligned v0 -; nextln: v2 = load.i64 notrap aligned v1+4 -; nextln: v3 = iadd_imm v2, 32 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: adjust_sp_up_imm 32 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif deleted file mode 100644 index 0e123f8a36..0000000000 --- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif +++ /dev/null @@ -1,132 +0,0 @@ -test binemit -set opt_level=speed_and_size -set avoid_div_traps -set baldrdash_prologue_words=3 -set emit_all_ones_funcaddrs -set enable_probestack=false -target x86_64 legacy haswell - -; This checks that a branch that is too far away is getting relaxed. In -; particular, the first block has to be non-empty but its encoding size must be -; zero (i.e. not generate any code). See also issue #666 for more details. - -function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v { - ss0 = incoming_arg 24, offset -24 - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - gv2 = load.i64 notrap aligned readonly gv0 - heap0 = static gv2, min 0xd839_6000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - - block0(v0: i32 [%rdi], v1: i32 [%rsi], v2: i64 [%r14]): -@0005 [-] fallthrough block3(v0, v1) - - block3(v8: i32 [%rdi], v19: i32 [%rsi]): -@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 -@0005 [RexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 -@0005 [trapif#00] trapif ne v91, interrupt -[DynRexOp1umr#89,%rax] v105 = copy v8 -@000b [DynRexOp1r_ib#83,%rax] v10 = iadd_imm v105, 1 - v80 -> v10 -@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 -@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 - v95 -> v93 -@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 -[DynRexOp1umr#89,%rbx] v106 = copy v12 -@0017 [DynRexOp1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 -@001a [DynRexOp1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 -[DynRexOp1umr#89,%rdi] v107 = copy v16 -@001f [DynRexOp1r_ib#83,%rdi] v18 = iadd_imm v107, 32 -[DynRexOp1umr#89,%r8] v108 = copy v19 -@0026 [DynRexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 - v82 -> v21 -@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 -@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 - v55 -> v23 -[DynRexOp1umr#89,%rsi] v109 = copy v23 -@0032 [DynRexOp1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 -@0035 [DynRexOp1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 - v69 -> v27 -[DynRexOp1umr#89,%r9] v110 = copy v27 -@003a [DynRexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 - v68 -> v29 -@0042 [DynRexOp1r_ib#83,%rcx] v31 = iadd_imm v12, -65 -@0045 [DynRexOp1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 -@0048 [DynRexOp1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 -@004c [DynRexOp1r_id#4081,%rcx] v37 = band_imm v35, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 -@0050 [Op1brib#70] brif sge v97, block6 -@0050 [-] fallthrough block10 - - block10: -[DynRexOp1umr#89,%rcx] v101 = copy v18 -@0054 [Op1jmpb#eb] jump block5(v18, v101) - - block6: -[DynRexOp1umr#89,%rcx] v102 = copy.i32 v16 -@0059 [RexOp1rmov#89] regmove v102, %rcx -> %rdi -@0059 [RexOp1rmov#89] regmove.i32 v16, %rbx -> %rcx -@0059 [-] fallthrough block5(v102, v16) - - block5(v41: i32 [%rdi], v84: i32 [%rcx]): - v83 -> v84 -@005d [DynRexOp1r_id#4081,%rdi] v43 = band_imm v41, 255 -@0062 [DynRexOp1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 - v52 -> v45 -@0065 [RexOp1rmov#89] regmove v45, %rdi -> %rbx -@0065 [DynRexOp1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 - v54 -> v47 -@0068 [RexOp1rmov#89] regmove v47, %rbx -> %rdi -@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 -@0068 [RexOp2urm_noflags#4b6,%r10] v50 = bint.i32 v49 -@0076 [DynRexOp1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 -@0079 [DynRexOp1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 -@007c [DynRexOp1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 -@0080 [DynRexOp1r_id#4081,%rdx] v63 = band_imm v61, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 -@0084 [RexOp1rmov#89] regmove v47, %rdi -> %rbx -@0084 [Op1brib#70] brif sge v98, block8 -@0084 [-] fallthrough block11 - - block11: -[DynRexOp1umr#89,%rdx] v103 = copy.i32 v29 -@0088 [Op1jmpb#eb] jump block7(v29, v10, v21, v103) - - block8: -[DynRexOp1umr#89,%rdx] v104 = copy.i32 v27 -@008d [RexOp1rmov#89] regmove v104, %rdx -> %r9 -@008d [RexOp1rmov#89] regmove.i32 v27, %rsi -> %rdx -@008d [-] fallthrough block7(v104, v10, v21, v27) - - block7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]): -@0091 [DynRexOp1r_id#4081,%r9] v71 = band_imm v67, 255 -@0094 [DynRexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 -@0097 [DynRexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 -@0098 [DynRexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 -@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 -@0099 [DynRexOp1rr#21,%r10] v78 = band.i32 v50, v77 -@009a [RexOp1tjccb#74] brz v78, block9 -@009a [-] fallthrough block12 - - block12: -[DynRexOp1umr#89,%rcx] v99 = copy v81 -[DynRexOp1umr#89,%rdx] v100 = copy v79 -@00a4 [RexOp1rmov#89] regmove v100, %rdx -> %rdi -@00a4 [RexOp1rmov#89] regmove v99, %rcx -> %rsi -@00a4 [Op1jmpd#e9] jump block3(v100, v99); bin: e9 ffffff2d - - block9: -@00a7 [-] fallthrough block4 - - block4: -@00ad [DynRexOp1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 -@00b3 [DynRexOp1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 -@00b4 [DynRexOp1rr#29,%rcx] v90 = isub v86, v89 -@00b5 [-] fallthrough block2(v90) - - block2(v5: i32 [%rcx]): -@00b6 [-] fallthrough block1(v5) - - block1(v3: i32 [%rcx]): -@00b6 [Op1umr#89,%rax] v96 = uextend.i64 v3 -@00b6 [-] fallthrough_return v96 -} diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif deleted file mode 100644 index a26e2d865c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif +++ /dev/null @@ -1,13 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> f32 system_v { -block0: - v0 = iconst.i8 255 -; check: v2 = iconst.i32 255 -; nextln: v0 = ireduce.i8 v2 - v1 = fcvt_from_uint.f32 v0 -; nextln: v3 = uextend.i64 v0 -; nextln: v1 = fcvt_from_sint.f32 v3 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif deleted file mode 100644 index 44b7e32d12..0000000000 --- a/cranelift/filetests/filetests/isa/x86/select-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(b1, i8, i8) -> i8 { -block0(v0: b1, v1: i8, v2: i8): - v3 = select v0, v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif deleted file mode 100644 index 31b73da391..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif +++ /dev/null @@ -1,18 +0,0 @@ -test shrink -set opt_level=speed_and_size -target x86_64 legacy - -function %test_multiple_uses(i32 [%rdi]) -> i32 { -block0(v0: i32 [%rdi]): -[DynRexOp1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 -[Op2seti_abcd#490,%rax] v1 = trueif eq v3 -[RexOp2urm_noflags#4b6,%rax] v2 = bint.i32 v1 -[Op1brib#70] brif eq v3, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v2 - -block1: -[Op2trap#40b] trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif deleted file mode 100644 index bb787832c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink.clif +++ /dev/null @@ -1,40 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -; Test that instruction shrinking eliminates REX prefixes when possible. - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %test_shrinking(i32) -> i32 { -block0(v0: i32 [ %rdi ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf - return v2 -} - -function %test_not_shrinking(i32) -> i32 { -block0(v0: i32 [ %r8 ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 - return v2 -} - -function %test_not_shrinking_i8() { -block0: -[-,%rsi] v1 = iconst.i8 1 - ; asm: movsbl %sil,%esi -[-,%rsi] v2 = sextend.i32 v1 ; bin: 40 0f be f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v3 = uextend.i32 v1 ; bin: 40 0f b6 f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v4 = uextend.i64 v1 ; bin: 40 0f b6 f6 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif deleted file mode 100644 index 0a8fbe7f0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ /dev/null @@ -1,116 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %arithmetic_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): -[-, %xmm6] v2 = iadd v0, v1 ; bin: 66 0f fc f2 -[-, %xmm6] v3 = isub v0, v1 ; bin: 66 0f f8 f2 -[-, %xmm6] v4 = sadd_sat v0, v1 ; bin: 66 0f ec f2 -[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2 -[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2 -[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2 -[-, %xmm6] v9 = iabs v1 ; bin: 66 0f 38 1c f2 - - return -} - -function %arithmetic_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]): -[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f fd dd -[-, %xmm3] v3 = isub v0, v1 ; bin: 66 0f f9 dd -[-, %xmm3] v4 = imul v0, v1 ; bin: 66 0f d5 dd -[-, %xmm3] v5 = uadd_sat v0, v1 ; bin: 66 0f dd dd -[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd -[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd -[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd -[-, %xmm3] v9 = iabs v1 ; bin: 66 0f 38 1d dd - - return -} - -function %arithmetic_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1 -[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1 -[-, %xmm0] v5 = iabs v1 ; bin: 66 0f 38 1e c1 - - return -} - -function %arithmetic_i64x2(i64x2, i64x2) { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm2]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f d4 c2 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fb c2 - - return -} - -function %arithmetic_i64x2_rex(i64x2, i64x2) { -block0(v0: i64x2 [%xmm8], v1: i64x2 [%xmm10]): -[-, %xmm8] v2 = iadd v0, v1 ; bin: 66 45 0f d4 c2 -[-, %xmm8] v3 = isub v0, v1 ; bin: 66 45 0f fb c2 - - return -} - -function %arithmetic_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 0f 51 db - return -} - -function %arithmetic_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 41 0f 58 da -[-, %xmm3] v3 = fsub v0, v1 ; bin: 41 0f 5c da -[-, %xmm3] v4 = fmul v0, v1 ; bin: 41 0f 59 da -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 41 0f 5e da -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 41 0f 5d da -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 41 0f 5f da -[-, %xmm3] v8 = sqrt v1 ; bin: 41 0f 51 da - return -} - -function %arithmetic_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 66 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 66 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 66 0f 51 db - return -} - -function %arithmetic_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]): -[-, %xmm11] v2 = fadd v0, v1 ; bin: 66 45 0f 58 dd -[-, %xmm11] v3 = fsub v0, v1 ; bin: 66 45 0f 5c dd -[-, %xmm11] v4 = fmul v0, v1 ; bin: 66 45 0f 59 dd -[-, %xmm11] v5 = fdiv v0, v1 ; bin: 66 45 0f 5e dd -[-, %xmm11] v6 = x86_fmin v0, v1 ; bin: 66 45 0f 5d dd -[-, %xmm11] v7 = x86_fmax v0, v1 ; bin: 66 45 0f 5f dd -[-, %xmm11] v8 = sqrt v0 ; bin: 66 45 0f 51 db - return -} - -function %pmuludq(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm5]): -[-, %xmm3] v2 = x86_pmuludq v0, v1 ; bin: 66 0f f4 dd - return v2 -} - -function %pmaddwd(i16x8, i16x8) -> i32x4 { -block0(v0: i16x8 [%xmm8], v1: i16x8 [%xmm9]): -[-, %xmm8] v2 = widening_pairwise_dot_product_s v0, v1 ; bin: 66 45 0f f5 c1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif deleted file mode 100644 index 74bc68ee67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ /dev/null @@ -1,117 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ineg_i32x4() -> b1 { -; check: const0 = 0x00000001000000010000000100000001 -; nextln: const1 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i32x4 [1 1 1 1] - v2 = ineg v0 - ; check: v5 = vconst.i32x4 const1 - ; nextln: v2 = isub v5, v0 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, -1 - - return v4 -} - -function %ineg_legalized() { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i8x16 0x00 - v1 = ineg v0 - ; check: v6 = vconst.i8x16 const0 - ; nextln: v1 = isub v6, v0 - - v2 = raw_bitcast.i16x8 v0 - v3 = ineg v2 - ; check: v7 = vconst.i16x8 const0 - ; nextln: v3 = isub v7, v2 - - v4 = raw_bitcast.i64x2 v0 - v5 = ineg v4 - ; check: v8 = vconst.i64x2 const0 - ; nextln: v5 = isub v8, v4 - - return -} - -function %fneg_legalized() { -; check: const2 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0] - v1 = fneg v0 - ; check: v4 = vconst.i32x4 const2 - ; nextln: v5 = ishl_imm v4, 31 - ; nextln: v6 = raw_bitcast.f32x4 v5 - ; nextln: v1 = bxor v0, v6 - - v2 = vconst.f64x2 [0x1.0 0x2.0] - v3 = fneg v2 - ; check: v7 = vconst.i64x2 const2 - ; nextln: v8 = ishl_imm v7, 63 - ; nextln: v9 = raw_bitcast.f64x2 v8 - ; nextln: v3 = bxor v2, v9 - - return -} - -function %fabs_legalized() { -; check: const1 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f64x2 [0x1.0 -0x2.0] - v1 = fabs v0 - ; check: v2 = vconst.i64x2 const1 - ; nextln: v3 = ushr_imm v2, 1 - ; nextln: v4 = raw_bitcast.f64x2 v3 - ; nextln: v1 = band v0, v4 - return -} - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v3 = ushr_imm v0, 32 - ; nextln: v4 = x86_pmuludq v3, v1 - ; nextln: v5 = ushr_imm v1, 32 - ; nextln: v6 = x86_pmuludq v5, v0 - ; nextln: v7 = iadd v4, v6 - ; nextln: v8 = ishl_imm v7, 32 - ; nextln: v9 = x86_pmuludq v0, v1 - ; nextln: v2 = iadd v9, v8 - return -} - -function %fmin_f32x4(f32x4, f32x4) { -block0(v0:f32x4, v1:f32x4): - v2 = fmin v0, v1 - ; check: v3 = x86_fmin v0, v1 - ; nextln: v4 = x86_fmin v1, v0 - ; nextln: v5 = bor v4, v3 - ; nextln: v6 = fcmp uno v3, v5 - ; nextln: v7 = raw_bitcast.f32x4 v6 - ; nextln: v8 = bor v5, v7 - ; nextln: v9 = raw_bitcast.i32x4 v7 - ; nextln: v10 = ushr_imm v9, 10 - ; nextln: v11 = raw_bitcast.f32x4 v10 - ; nextln: v2 = band_not v8, v11 - return -} - -function %fmax_f64x2(f64x2, f64x2) { -block0(v0:f64x2, v1:f64x2): - v2 = fmax v0, v1 - ; check: v3 = x86_fmax v0, v1 - ; nextln: v4 = x86_fmax v1, v0 - ; nextln: v5 = bxor v3, v4 - ; nextln: v6 = bor v4, v5 - ; nextln: v7 = fsub v6, v5 - ; nextln: v8 = fcmp uno v5, v7 - ; nextln: v9 = raw_bitcast.i64x2 v8 - ; nextln: v10 = ushr_imm v9, 13 - ; nextln: v11 = raw_bitcast.f64x2 v10 - ; nextln: v2 = band_not v7, v11 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif deleted file mode 100644 index 0daf064713..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif +++ /dev/null @@ -1,17 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2() { -block0: - [-, %xmm1] v0 = vconst.i64x2 [1 2] - [-, %xmm2] v1 = vconst.i64x2 [2 2] - [-, %xmm14] v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2 - ; 62, mandatory EVEX prefix - ; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38 - ; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01 - ; 08 = 0000 1000, everything, LL' indicates 128-bit, V' is unset (inverted, %xmm1 has MSB of 0) - ; 40, opcode (correct) - ; f2 = 1111 0010, ModR/M byte using 0b110 from %xmm14 in reg and 0b010 from %xmm2 in r/m - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif deleted file mode 100644 index 294902d45b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v2 = x86_pmullq v0, v1 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif deleted file mode 100644 index 6f235e6b3b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif +++ /dev/null @@ -1,9 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy has_avx512vl=true - -function %fcvt_from_uint(i32x4) { -block0(v0: i32x4 [%xmm2]): -[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif deleted file mode 100644 index cdadd3254d..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512f=true - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v1 = x86_vcvtudq2ps v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif deleted file mode 100644 index 3131a8aa0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif +++ /dev/null @@ -1,99 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %ishl_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psll v0, v1 ; bin: 66 0f f1 d1 - return v2 -} - -function %ishl_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psll v0, v1 ; bin: 66 0f f2 e0 - return v2 -} - -function %ishl_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psll v0, v1 ; bin: 66 0f f3 f3 - return v2 -} - -function %ushr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psrl v0, v1 ; bin: 66 0f d1 d1 - return v2 -} - -function %ushr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psrl v0, v1 ; bin: 66 0f d2 e0 - return v2 -} - -function %ushr_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psrl v0, v1 ; bin: 66 0f d3 f3 - return v2 -} - -function %sshr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psra v0, v1 ; bin: 66 0f e1 d1 - return v2 -} - -function %sshr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0 - return v2 -} - -function %ishl_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03 - return v2 -} - -function %ishl_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a - return v2 -} - -function %ishl_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a - return v2 -} - -function %ushr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03 - return v2 -} - -function %ushr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a - return v2 -} - -function %ushr_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a - return v2 -} - -function %sshr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03 - return v2 -} - -function %sshr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif deleted file mode 100644 index 7674f83e01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ /dev/null @@ -1,111 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ushr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psrl v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %sshr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = sshr v1, v0 - ; check: v3 = iadd_imm v0, 8 - ; nextln: v4 = bitcast.i64x2 v3 - - ; nextln: v5 = x86_punpckl v1, v1 - ; nextln: v6 = raw_bitcast.i16x8 v5 - ; nextln: v7 = x86_psra v6, v4 - - ; nextln: v8 = x86_punpckh v1, v1 - ; nextln: v9 = raw_bitcast.i16x8 v8 - ; nextln: v10 = x86_psra v9, v4 - - ; nextln: v2 = snarrow v7, v10 - return v2 -} - -function %ishl_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psll v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %ishl_i32x4() -> i32x4 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i32x4 [1 2 4 8] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psll v1, v3 - return v2 -} - -function %ushr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psrl v1, v3 - return v2 -} - -function %sshr_i16x8() -> i16x8 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i16x8 [1 2 4 8 16 32 64 128] - v2 = sshr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psra v1, v3 - return v2 -} - -function %sshr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = sshr v1, v0 - ; check: v3 = x86_pextr v1, 0 - ; nextln: v4 = sshr v3, v0 - ; nextln: v5 = x86_pinsr v1, v4, 0 - ; nextln: v6 = x86_pextr v1, 1 - ; nextln: v7 = sshr v6, v0 - ; nextln: v2 = x86_pinsr v5, v7, 1 - return v2 -} - -function %bitselect_i16x8() -> i16x8 { -block0: - v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v2 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v3 = bitselect v0, v1, v2 - ; check: v4 = band v1, v0 - ; nextln: v5 = band_not v2, v0 - ; nextln: v3 = bor v4, v5 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif deleted file mode 100644 index 1d3db4a119..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ /dev/null @@ -1,138 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %icmp_i8x16() { -block0: -[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db -[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 -[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc - return -} - -function %icmp_i16x8_rex() { -block0: -[-, %xmm0] v0 = vconst.i16x8 0x00 -[-, %xmm15] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 41 0f 75 c7 - return -} - -function %icmp_i32x4() { -block0: -[-, %xmm0] v0 = vconst.i32x4 0x00 -[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 - return -} - -function %icmp_i64x2_rex() { -block0: -[-, %xmm8] v0 = vconst.i64x2 0x00 -[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff -[-, %xmm8] v2 = icmp eq v0, v1 ; bin: 66 44 0f 38 29 c1 - return -} - -function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): -[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 - return v2 -} - -function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]): -[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3 - return v2 -} - -function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]): -[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5 - return v2 -} - -function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): -[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 - return v2 -} - -function %min_max_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]): -[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9 -[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9 -[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9 -[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9 - return -} - -function %min_max_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5 - return -} - -function %min_max_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 - return -} - -function %fcmp_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 0f c2 d4 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 0f c2 d4 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 0f c2 d4 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 0f c2 d4 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 0f c2 d4 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 0f c2 d4 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 0f c2 d4 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 0f c2 d4 07 - return -} - -function %fcmp_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm8], v1: f32x4 [%xmm8]): -[-, %xmm8] v2 = fcmp eq v0, v1 ; bin: 45 0f c2 c0 00 -[-, %xmm8] v3 = fcmp lt v0, v1 ; bin: 45 0f c2 c0 01 -[-, %xmm8] v4 = fcmp le v0, v1 ; bin: 45 0f c2 c0 02 -[-, %xmm8] v5 = fcmp uno v0, v1 ; bin: 45 0f c2 c0 03 -[-, %xmm8] v6 = fcmp ne v0, v1 ; bin: 45 0f c2 c0 04 -[-, %xmm8] v7 = fcmp uge v0, v1 ; bin: 45 0f c2 c0 05 -[-, %xmm8] v8 = fcmp ugt v0, v1 ; bin: 45 0f c2 c0 06 -[-, %xmm8] v9 = fcmp ord v0, v1 ; bin: 45 0f c2 c0 07 - return -} - -function %fcmp_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 0f c2 d0 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 0f c2 d0 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 0f c2 d0 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 0f c2 d0 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 0f c2 d0 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 0f c2 d0 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 0f c2 d0 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 0f c2 d0 07 - return -} - -function %fcmp_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm9], v1: f64x2 [%xmm11]): -[-, %xmm9] v2 = fcmp eq v0, v1 ; bin: 66 45 0f c2 cb 00 -[-, %xmm9] v3 = fcmp lt v0, v1 ; bin: 66 45 0f c2 cb 01 -[-, %xmm9] v4 = fcmp le v0, v1 ; bin: 66 45 0f c2 cb 02 -[-, %xmm9] v5 = fcmp uno v0, v1 ; bin: 66 45 0f c2 cb 03 -[-, %xmm9] v6 = fcmp ne v0, v1 ; bin: 66 45 0f c2 cb 04 -[-, %xmm9] v7 = fcmp uge v0, v1 ; bin: 66 45 0f c2 cb 05 -[-, %xmm9] v8 = fcmp ugt v0, v1 ; bin: 66 45 0f c2 cb 06 -[-, %xmm9] v9 = fcmp ord v0, v1 ; bin: 66 45 0f c2 cb 07 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif deleted file mode 100644 index a6324a34cc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ /dev/null @@ -1,40 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ne v0, v1 - ; check: v3 = icmp eq v0, v1 - ; nextln: v4 = vconst.b32x4 const0 - ; nextln: v2 = bxor v4, v3 - return v2 -} - -function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ugt v0, v1 - ; check: v3 = x86_pmaxu v0, v1 - ; nextln: v4 = icmp eq v3, v1 - ; nextln: v5 = vconst.b32x4 const0 - ; nextln: v2 = bxor v5, v4 - return v2 -} - -function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8, v1: i16x8): - v2 = icmp sge v0, v1 - ; check: v3 = x86_pmins v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} - -function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16, v1: i8x16): - v2 = icmp uge v0, v1 - ; check: v3 = x86_pminu v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif deleted file mode 100644 index f26b436931..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ /dev/null @@ -1,26 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy nehalem - -; Ensure raw_bitcast emits no instructions. -function %raw_bitcast_i16x8_to_b32x4() { -block0: -[-, %rbx] v0 = bconst.b16 true -[-, %xmm2] v1 = scalar_to_vector.b16x8 v0 -[-, %xmm2] v2 = raw_bitcast.i32x4 v1 ; bin: - return -} - -function %conversions_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 -[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 - return -} - -function %conversions_i16x8(i16x8) { -block0(v0: i16x8 [%xmm6]): -[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6 -[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif deleted file mode 100644 index 6de14e181a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ /dev/null @@ -1,70 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v2 = raw_bitcast.i16x8 v0 - ; nextln: v3 = vconst.i16x8 const0 - ; nextln: v4 = x86_pblendw v3, v2, 85 - ; nextln: v5 = raw_bitcast.i32x4 v4 - ; nextln: v6 = isub v0, v5 - ; nextln: v7 = fcvt_from_sint.f32x4 v5 - ; nextln: v8 = ushr_imm v6, 1 - ; nextln: v9 = fcvt_from_sint.f32x4 v8 - ; nextln: v10 = fadd v9, v9 - ; nextln: v1 = fadd v10, v7 - return v1 -} - -function %fcvt_to_sint_sat(f32x4) -> i32x4 { -block0(v0:f32x4): - v1 = fcvt_to_sint_sat.i32x4 v0 - ; check: v2 = fcmp eq v0, v0 - ; nextln: v3 = raw_bitcast.f32x4 v2 - ; nextln: v4 = band v0, v3 - ; nextln: v5 = bxor v3, v0 - ; nextln: v6 = raw_bitcast.i32x4 v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v4 - ; nextln: v8 = band v6, v7 - ; nextln: v9 = sshr_imm v8, 31 - ; nextln: v1 = bxor v7, v9 - return v1 -} - -function %fcvt_to_uint_sat(f32x4) -> i32x4 { -; check: const0 = 0x00000000000000000000000000000000 -; nextln: const1 = 0x4f0000004f0000004f0000004f000000 -block0(v0:f32x4): - v1 = fcvt_to_uint_sat.i32x4 v0 - ; check: v2 = vconst.f32x4 const0 - ; nextln: v3 = vconst.f32x4 const1 - ; nextln: v4 = x86_fmax v0, v2 - ; nextln: v5 = fsub v4, v3 - ; nextln: v6 = fcmp le v3, v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v5 - ; nextln: v8 = raw_bitcast.i32x4 v6 - ; nextln: v9 = bxor v7, v8 - ; nextln: v10 = raw_bitcast.i32x4 v2 - ; nextln: v11 = x86_pmaxs v9, v10 - ; nextln: v12 = x86_cvtt2si.i32x4 v4 - ; nextln: v1 = iadd v12, v11 - return v1 -} - -function %uwiden_high(i8x16) -> i16x8 { -block0(v0: i8x16): - v1 = uwiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = uwiden_low v2 - return v1 -} - -function %swiden_high(i16x8) -> i32x4 { -block0(v0: i16x8): - v1 = swiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = swiden_low v2 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif deleted file mode 100644 index 6240a08557..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif +++ /dev/null @@ -1,34 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag. - -function %scalar_to_vector_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0 - return -} - -function %scalar_to_vector_i16() { -block0: -[-, %rbx] v0 = iconst.i16 42 -[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3 - return -} - -function %scalar_to_vector_b32() { -block0: -[-, %rcx] v0 = bconst.b32 false -[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9 - return -} - -function %scalar_to_vector_i64() { -block0: -[-, %rdx] v0 = iconst.i64 42 -[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 48 0f 6e fa - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif deleted file mode 100644 index a8c14a6342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ /dev/null @@ -1,126 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -; for insertlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested - -function %insertlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %rbx] v1 = bconst.b8 false -[-, %xmm0] v2 = splat.b8x16 v0 -[-, %xmm0] v3 = x86_pinsr v2, v1, 10 ; bin: 66 0f 3a 20 c3 0a - return -} - -function %insertlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %rbx] v1 = iconst.i16 5 -[-, %xmm1] v2 = splat.i16x8 v0 -[-, %xmm1] v3 = x86_pinsr v2, v1, 4 ; bin: 66 0f c4 cb 04 - return -} - -function %insertlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %rbx] v1 = iconst.i32 99 -[-, %xmm4] v2 = splat.i32x4 v0 -[-, %xmm4] v3 = x86_pinsr v2, v1, 2 ; bin: 66 0f 3a 22 e3 02 - return -} - -function %insertlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 true -[-, %rbx] v1 = bconst.b64 false -[-, %xmm2] v2 = splat.b64x2 v0 -[-, %xmm2] v3 = x86_pinsr v2, v1, 1 ; bin: 66 48 0f 3a 22 d3 01 - return -} - -; for extractlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested - -function %extractlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = splat.b8x16 v0 -[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a - return -} - -function %extractlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %xmm1] v1 = splat.i16x8 v0 -[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 - return -} - -function %extractlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm4] v1 = splat.i32x4 v0 -[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 - return -} - -function %extractlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 false -[-, %xmm2] v1 = splat.b64x2 v0 -[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 - return -} - -;; shuffle - -function %pshufd() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 0f 6e c0 -[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 - return -} - -function %pshufb() { -block0: -[-, %rax] v0 = iconst.i8 42 -[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 0f 6e c0 -[-, %rbx] v2 = iconst.i8 43 -[-, %xmm12] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 44 0f 6e e3 -[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4 - return -} - -;; blend - -function %pblendw(b16x8, b16x8) { -block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]): -[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55 ; bin: 66 44 0f 3a 0e d2 55 - return -} - -;; pack/unpack - -function %unpack_high_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]): -[-, %xmm0] v2 = x86_punpckh v0, v1 ; bin: 66 41 0f 68 c4 - return -} - -function %unpack_low_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]): -[-, %xmm7] v2 = x86_punpckl v0, v1 ; bin: 66 0f 62 fe - return -} - -function %narrowing_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]): -[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8 -[-, %xmm7] v3 = unarrow v0, v1 ; bin: 66 41 0f 67 f8 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif deleted file mode 100644 index 91ff8eb9a0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -set opt_level=speed_and_size -set enable_probestack=false -set enable_simd -target x86_64 legacy - -; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) -function %scalar_to_vector_f32() -> f32x4 baldrdash_system_v { -block0: - v0 = f32const 0x0.42 - v1 = scalar_to_vector.f32x4 v0 - return v1 -} - -; check: block0 -; nextln: v2 = iconst.i32 0x3e84_0000 -; nextln: v0 = bitcast.f32 v2 -; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0 -; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif deleted file mode 100644 index 284ef35180..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif +++ /dev/null @@ -1,101 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -;; shuffle - -function %shuffle_different_ssa_values() -> i8x16 { -; check: const2 = 0x80000000000000000000000000000000 -; nextln: const3 = 0x01808080808080808080808080808080 -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 0x01 - v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 - return v2 -} -; check: v1 = vconst.i8x16 const1 -; nextln: v3 = vconst.i8x16 const2 -; nextln: v4 = x86_pshufb v0, v3 -; nextln: v5 = vconst.i8x16 const3 -; nextln: v6 = x86_pshufb v1, v5 -; nextln: v2 = bor v4, v6 - -function %shuffle_same_ssa_value() -> i8x16 { -; check: const1 = 0x03000000000000000000000000000000 -block0: - v1 = vconst.i8x16 0x01 - v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 - return v2 -} -; check: v1 = vconst.i8x16 const0 -; nextln: v3 = vconst.i8x16 const1 -; nextln: v2 = x86_pshufb v1, v3 - -;; splat - -function %splat_i32() -> i32x4 { -block0: - v0 = iconst.i32 42 - v1 = splat.i32x4 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i32 42 -; nextln: v2 = scalar_to_vector.i32x4 v0 -; nextln: v1 = x86_pshufd v2, 0 -; nextln: return v1 -; nextln: } - -function %splat_i64() -> i64x2 { -block0: - v0 = iconst.i64 42 - v1 = splat.i64x2 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i64 42 -; nextln: v2 = scalar_to_vector.i64x2 v0 -; nextln: v1 = x86_pinsr v2, v0, 1 -; nextln: return v1 - -function %splat_b16() -> b16x8 { -block0: - v0 = bconst.b16 true - v1 = splat.b16x8 v0 - return v1 -} -; check: block0: -; nextln: v0 = bconst.b16 true -; nextln: v2 = scalar_to_vector.b16x8 v0 -; nextln: v3 = x86_pinsr v2, v0, 1 -; nextln: v4 = raw_bitcast.i32x4 v3 -; nextln: v5 = x86_pshufd v4, 0 -; nextln: v1 = raw_bitcast.b16x8 v5 -; nextln: return v1 - -function %splat_i8() -> i8x16 { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = iconst.i8 42 - v1 = splat.i8x16 v0 - return v1 -} -; check: block0: -; nextln: v2 = iconst.i32 42 -; nextln: v0 = ireduce.i8 v2 -; nextln: v3 = scalar_to_vector.i8x16 v0 -; nextln: v4 = vconst.i8x16 const0 -; nextln: v1 = x86_pshufb v3, v4 -; nextln: return v1 - -function %swizzle() -> i8x16 { -; check: const1 = 0x70707070707070707070707070707070 -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = swizzle.i8x16 v0, v1 - ; check: v3 = vconst.i8x16 const1 - ; nextln: v4 = uadd_sat v1, v3 - ; nextln: v2 = x86_pshufb v0, v4 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif deleted file mode 100644 index af5ca0fe63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif +++ /dev/null @@ -1,33 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %bor_b16x8(b16x8, b16x8) -> b16x8 { -block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]): -[-, %xmm2] v2 = bor v0, v1 ; bin: 66 0f eb d1 - return v2 -} - -function %band_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm6] v2 = band v0, v1 ; bin: 66 0f db f3 - return v2 -} - -function %bxor_b32x4(b32x4, b32x4) -> b32x4 { -block0(v0: b32x4 [%xmm4], v1: b32x4 [%xmm0]): -[-, %xmm4] v2 = bxor v0, v1 ; bin: 66 0f ef e0 - return v2 -} - -function %band_not_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de - return v2 -} - -function %x86_ptest_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]): -[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif deleted file mode 100644 index 5e5bb7ac43..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: b32x4): - v1 = bnot v0 - ; check: v2 = vconst.b32x4 const0 - ; nextln: v1 = bxor v2, v0 - return v1 -} - -function %vany_true_b32x4(b32x4) -> b1 { -block0(v0: b32x4): - v1 = vany_true v0 - ; check: v2 = x86_ptest v0, v0 - ; nextln: v1 = trueif ne v2 - return v1 -} - -function %vall_true_i64x2(i64x2) -> b1 { -; check: const0 = 0x00000000000000000000000000000000 -block0(v0: i64x2): - v1 = vall_true v0 - ; check: v2 = vconst.i64x2 const0 - ; nextln: v3 = icmp eq v0, v2 - ; nextln: v4 = x86_ptest v3, v3 - ; nextln: v1 = trueif eq v4 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif deleted file mode 100644 index 6b6b91a915..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif +++ /dev/null @@ -1,11 +0,0 @@ -test rodata -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -block0(v0: b32x4): - v1 = bnot v0 - return v1 -} - -; sameln: [FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF] diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif deleted file mode 100644 index 4f8b050d01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif +++ /dev/null @@ -1,85 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %load_store_simple(i64) { -block0(v0: i64 [%rax]): -[-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00 -[-] store v10, v0 ; bin: heap_oob 0f 11 00 - - ; use REX prefix -[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v12, v0 ; bin: heap_oob 44 0f 11 00 - - return -} - -function %load_store_with_displacement(i64) { -block0(v0: i64 [%rax]): - ; use 8-bit displacement -[-, %xmm0] v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a -[-] store v1, v0+42 ; bin: heap_oob 0f 11 40 2a - - ; use 8-bit displacement with REX prefix -[-, %xmm8] v2 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v2, v0 ; bin: heap_oob 44 0f 11 00 - - ; use 32-bit displacement -[-, %xmm0] v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100 -[-] store v3, v0+256 ; bin: heap_oob 0f 11 80 00000100 - - ; use 32-bit displacement with REX prefix -[-, %xmm8] v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100 -[-] store v4, v0+256 ; bin: heap_oob 44 0f 11 80 00000100 - - return -} - -function %load_store_complex(i64, i64) { -block0(v0: i64 [%rax], v1: i64 [%rbx]): - ; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows - ; %rax and %rbx form the SIB 0x18 -[-, %xmm1] v10 = load_complex.f64x2 v0+v1 ; bin: heap_oob 40 0f 10 0c 18 - ; enabling bit 6 of the ModR/M byte indicates a disp8 follows -[-] store_complex v10, v0+v1+5 ; bin: heap_oob 40 0f 11 4c 18 05 - - return -} - -function %copy_to_ssa() { -block0: -[-, %xmm1] v0 = copy_to_ssa.i64x2 %xmm3 ; bin: 40 0f 28 cb -[-, %xmm2] v1 = copy_to_ssa.i64x2 %xmm15 ; bin: 41 0f 28 d7 - - return -} - -function %uload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = uload8x8 v1+0 ; bin: heap_oob 66 0f 38 30 12 - [-,%xmm2] v4 = uload8x8 v1+20 ; bin: heap_oob 66 0f 38 30 52 14 - [-,%xmm2] v5 = uload8x8 v1+256 ; bin: heap_oob 66 0f 38 30 92 00000100 - [-,%xmm2] v6 = uload16x4 v1+0 ; bin: heap_oob 66 0f 38 33 12 - [-,%xmm2] v7 = uload16x4 v1+20 ; bin: heap_oob 66 0f 38 33 52 14 - [-,%xmm2] v8 = uload16x4 v1+256 ; bin: heap_oob 66 0f 38 33 92 00000100 - [-,%xmm10] v9 = uload32x2 v1+0 ; bin: heap_oob 66 44 0f 38 35 12 - [-,%xmm10] v10 = uload32x2 v1+20 ; bin: heap_oob 66 44 0f 38 35 52 14 - [-,%xmm10] v11 = uload32x2 v1+256 ; bin: heap_oob 66 44 0f 38 35 92 00000100 - return -} - -function %sload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = sload8x8 v1+0 ; bin: heap_oob 66 0f 38 20 12 - [-,%xmm2] v4 = sload8x8 v1+20 ; bin: heap_oob 66 0f 38 20 52 14 - [-,%xmm2] v5 = sload8x8 v1+256 ; bin: heap_oob 66 0f 38 20 92 00000100 - [-,%xmm10] v6 = sload16x4 v1+0 ; bin: heap_oob 66 44 0f 38 23 12 - [-,%xmm10] v7 = sload16x4 v1+20 ; bin: heap_oob 66 44 0f 38 23 52 14 - [-,%xmm10] v8 = sload16x4 v1+256 ; bin: heap_oob 66 44 0f 38 23 92 00000100 - [-,%xmm2] v9 = sload32x2 v1+0 ; bin: heap_oob 66 0f 38 25 12 - [-,%xmm2] v10 = sload32x2 v1+20 ; bin: heap_oob 66 0f 38 25 52 14 - [-,%xmm2] v11 = sload32x2 v1+256 ; bin: heap_oob 66 0f 38 25 92 00000100 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif deleted file mode 100644 index 4141a05b32..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif +++ /dev/null @@ -1,22 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v { - ss0 = explicit_slot 32, offset -48 - ss1 = spill_slot 16, offset -64 - ss2 = incoming_arg 16, offset -16 - sig0 = () system_v - fn0 = colocated u0:2 sig0 - -block0(v5: i64 [%rbp]): -[-] x86_push v5 -[-] copy_special %rsp -> %rbp -[-] adjust_sp_down_imm 48 -[-,%rax] v0 = stack_addr.i64 ss0 -[-,%xmm15] v4 = load.i32x4 v0 -[-,%rax] v2 = x86_pextr v4, 1 ; bin: 66 44 0f 3a 16 f8 01 -[-] adjust_sp_up_imm 48 -[-] v6 = x86_pop.i64 -[-] return v2, v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif deleted file mode 100644 index 23aee87655..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif +++ /dev/null @@ -1,29 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -function %vconst_b8() { -block0: -[-, %xmm2] v0 = vconst.b8x16 0x01 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.b8x16 0x02 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %vconst_with_preamble() { -const42 = i32x4 [1 0 0 0] -const43 = i32x4 [2 0 0 0] - -block0: -[-, %xmm2] v0 = vconst.i32x4 const42 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.i32x4 const43 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %address_of_vconst() { -const42 = i32x4 [1 0 0 0] - -block0: -[-, %rax] v0 = const_addr.i64 const42 ; bin: 48 8d 05 00000001 PCRelRodata4(8) - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif deleted file mode 100644 index 477984b344..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -set enable_simd=true -set enable_probestack=false -target x86_64 legacy haswell - -; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) -function %vconst_i32() -> i32x4 baldrdash_system_v { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} -; check: const0 = 0x00000000000000000000000000001234 -; check: block0: -; nextln: v0 = vconst.i32x4 const0 -; nextln: return v0 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif deleted file mode 100644 index 07fa364752..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif +++ /dev/null @@ -1,10 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy - -function %vconst_optimizations() { -block0: -[-, %xmm4] v0 = vconst.b8x16 0x00 ; bin: 66 0f ef e4 -[-, %xmm7] v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 ff - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif deleted file mode 100644 index e7e63e65ea..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif +++ /dev/null @@ -1,49 +0,0 @@ -test rodata -set enable_simd=true -target x86_64 legacy haswell - -function %vconst_i32() -> i32x4 { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} - -; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - -function %vconst_b16() -> b16x8 { -block0: - v0 = vconst.b16x8 [true false true false true false true true] - return v0 -} - -; sameln: [FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, FF, FF] - - -; Since both jump tables and constants are emitted after the function body, it is important that they do not interfere. -; This test shows that even in the presence of jump tables, constants are emitted correctly -function %vconst_with_jumptables() { -jt0 = jump_table [block0] - -block10: - v10 = iconst.i64 0 - br_table v10, block1, jt0 -block0: - jump block11 -block1: - jump block11 -block11: - v11 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] - return -} - -; sameln: [1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, 10] - -function %vconst_preamble() -> b16x8 { -const42 = i32x4 [0 1 2 3] -const43 = i32x4 [4 5 6 7] -block0: - v0 = vconst.b16x8 const42 - return v0 -} - -; sameln: [0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0] diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif deleted file mode 100644 index 275a5e4411..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif +++ /dev/null @@ -1,27 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function %vselect_i8x16(b8x16, i8x16, i8x16) { -block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) { -block0(v0: b16x8 [%xmm0], v1: i16x8 [%xmm3], v2: i16x8 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) { -block0(v0: b32x4 [%xmm0], v1: i32x4 [%xmm3], v2: i32x4 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 14 eb - return -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) { -block0(v0: b64x2 [%xmm0], v1: i64x2 [%xmm3], v2: i64x2 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 15 eb - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif deleted file mode 100644 index 648b3f5584..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif +++ /dev/null @@ -1,45 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy - -;; Test if vselect gets legalized if BLEND* instructions are not available - -function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 { -block0(v0: b8x16, v1: i8x16, v2: i8x16): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i8x16 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { -block0(v0: b16x8, v1: i16x8, v2: i16x8): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i16x8 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 { -block0(v0: b32x4, v1: i32x4, v2: i32x4): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i32x4 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 { -block0(v0: b64x2, v1: i64x2, v2: i64x2): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i64x2 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif deleted file mode 100644 index f06b3ec0eb..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif +++ /dev/null @@ -1,33 +0,0 @@ -; binary emission of stack address instructions on i686. -test binemit -set opt_level=none -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr32.clif | llvm-mc -show-encoding -triple=i686 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i32 ss0 ; bin: 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i32 ss1 ; bin: 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i32 ss2 ; bin: 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i32 ss3 ; bin: 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i32 ss4 ; bin: 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i32 ss5 ; bin: 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i32 ss4+1 ; bin: 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i32 ss4+2 ; bin: 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i32 ss4+2048 ; bin: 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i32 ss4-4096 ; bin: 8d 8c 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif deleted file mode 100644 index 5b8d5d7ab7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif +++ /dev/null @@ -1,45 +0,0 @@ -; binary emission of stack address instructions on x86-64. -test binemit -set opt_level=none -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i64 ss0 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i64 ss1 ; bin: 48 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i64 ss2 ; bin: 48 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i64 ss3 ; bin: 48 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i64 ss4 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i64 ss5 ; bin: 48 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i64 ss4+1 ; bin: 48 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i64 ss4+2 ; bin: 48 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i64 ss4+2048 ; bin: 48 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i64 ss4-4096 ; bin: 48 8d 8c 24 fffff808 - -[-,%r8] v50 = stack_addr.i64 ss0 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v51 = stack_addr.i64 ss1 ; bin: 4c 8d 84 24 00000408 -[-,%r8] v52 = stack_addr.i64 ss2 ; bin: 4c 8d 84 24 00000008 -[-,%r8] v53 = stack_addr.i64 ss3 ; bin: 4c 8d 84 24 00000000 -[-,%r8] v54 = stack_addr.i64 ss4 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v55 = stack_addr.i64 ss5 ; bin: 4c 8d 84 24 00000c08 - -[-,%r8] v70 = stack_addr.i64 ss4+1 ; bin: 4c 8d 84 24 00000809 -[-,%r8] v71 = stack_addr.i64 ss4+2 ; bin: 4c 8d 84 24 0000080a -[-,%r8] v72 = stack_addr.i64 ss4+2048 ; bin: 4c 8d 84 24 00001008 -[-,%r8] v73 = stack_addr.i64 ss4-4096 ; bin: 4c 8d 84 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif deleted file mode 100644 index 508fae04d2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif +++ /dev/null @@ -1,21 +0,0 @@ -; legalization of stack load and store instructions on x86-64. -test legalizer -set opt_level=none -target x86_64 legacy haswell - -function %stack_load_and_store() { - ss0 = explicit_slot 8, offset 0 - -block0: - v0 = stack_load.i64 ss0 - -; check: v1 = stack_addr.i64 ss0 -; check: v0 = load.i64 notrap aligned v1 - - stack_store.i64 v0, ss0 - -; check: v2 = stack_addr.i64 ss0 -; check: store notrap aligned v0, v2 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif deleted file mode 100644 index 0a9f973fac..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 { - ss0 = explicit_slot 1 - -block0(v0: i8): - stack_store v0, ss0 - ; check: v2 = stack_addr.i64 ss0 - ; nextln: v3 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v3, v2 - - v1 = stack_load.i8 ss0 - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v5 = uload8.i32 notrap aligned v4 - ; nextln: v1 = ireduce.i8 v5 - - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/struct-arg.clif b/cranelift/filetests/filetests/isa/x86/struct-arg.clif deleted file mode 100644 index 8358e8633a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/struct-arg.clif +++ /dev/null @@ -1,117 +0,0 @@ -test compile -set is_pic -target x86_64 legacy - -function u0:0(i64 sarg(64)) -> i8 system_v { -block0(v0: i64): - v1 = load.i8 v0 - return v1 -} - -; check: function u0:0(sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v3: sarg_t [ss0], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: v0 -> v2 -; nextln: [RexOp2ld#4b6,%rax] v4 = uload8.i32 v2 -; nextln: [null#00,%rax] v1 = ireduce.i8 v4 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:1(i64, i64 sarg(64)) -> i8 system_v { -block0(v0: i64, v1: i64): - v2 = load.i8 v1 - return v2 -} - -; check: function u0:1(i64 [%rdi], sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v0: i64 [%rdi], v4: sarg_t [ss0], v6: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v6 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: v1 -> v3 -; nextln: [RexOp2ld#4b6,%rax] v5 = uload8.i32 v3 -; nextln: [null#00,%rax] v2 = ireduce.i8 v5 -; nextln: [RexOp1popq#58,%rbp] v7 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v7 -; nextln: } - - -function u0:2(i64) -> i8 system_v { -fn1 = u0:0(i64 sarg(64)) -> i8 system_v - -block0(v0: i64): - v1 = call fn1(v0) - return v1 -} - -; check: function u0:2(i64 [%rdi], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 64 -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v3 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v0, %rdi -> %rsi -; nextln: [RexOp1rmov#8089] regmove v2, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v3, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v2, v0, v3) -; nextln: [dummy_sarg_t#00,ss0] v4 = dummy_sarg_t -; nextln: [Op1call_plt_id#e8,%rax] v1 = call fn1(v4) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 64 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:3(i64, i64) -> i8 system_v { -fn1 = u0:0(i64, i64 sarg(64)) -> i8 system_v - -block0(v0: i64, v1: i64): - v2 = call fn1(v0, v1) - return v2 -} - -; check: function u0:3(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%r15]) -> i8 [%rax], i64 fp [%rbp], i64 csr [%r15] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = spill_slot 8, offset -32 -; nextln: ss2 = incoming_arg 24, offset -24 -; nextln: sig0 = (i64 [%rdi], sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v6: i64 [%rdi], v1: i64 [%rsi], v8: i64 [%rbp], v9: i64 [%r15]): -; nextln: [RexOp1pushq#50] x86_push v8 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pushq#50] x86_push v9 -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 72 -; nextln: [RexOp1spillSib32#8089,ss1] v0 = spill v6 -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v4 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v3, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v4, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v3, v1, v4) -; nextln: [dummy_sarg_t#00,ss0] v5 = dummy_sarg_t -; nextln: [RexOp1fillSib32#808b,%r15] v7 = fill v0 -; nextln: [RexOp1rmov#8089] regmove v7, %r15 -> %rdi -; nextln: [Op1call_plt_id#e8,%rax] v2 = call fn1(v7, v5) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 72 -; nextln: [RexOp1popq#58,%r15] v11 = x86_pop.i64 -; nextln: [RexOp1popq#58,%rbp] v10 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v10, v11 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif deleted file mode 100644 index c5144bfd97..0000000000 --- a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif +++ /dev/null @@ -1,205 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-linux legacy haswell - -; check the unwind information with a function with no args -function %no_args() system_v { -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000006 (end_addr = 0x0000000000000006) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with medium-sized stack alloc -function %medium_stack() system_v { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with large-sized stack alloc -function %large_stack() system_v { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop -; nextln: - -; check a function that has CSRs -function %lots_of_registers(i64, i64) system_v { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000044 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000074 (end_addr = 0x0000000000000074) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_offset (r3, 3) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r12, 4) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r13, 5) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r14, 6) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r15, 7) -; nextln: DW_CFA_advance_loc (94) -; nextln: DW_CFA_same_value (r15) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r14) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r13) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r12) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r3) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif deleted file mode 100644 index 2c957e0b9a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_elf.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=elf_gd -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [elf_tls_get_addr#00,%rax] v1 = x86_elf_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif deleted file mode 100644 index d3481a15bf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_enc.clif +++ /dev/null @@ -1,11 +0,0 @@ -test binemit -target x86_64 legacy - -function u0:0() -> i64, i64 { -gv0 = symbol colocated tls u1:0 - -block0: - [-, %rax] v0 = x86_elf_tls_get_addr gv0 ; bin: 66 48 8d 3d ElfX86_64TlsGd(u1:0-4) 00000000 66 66 48 e8 CallPLTRel4(%ElfTlsGetAddr-4) 00000000 - [-, %rax] v1 = x86_macho_tls_get_addr gv0; bin: 48 8b 3d MachOX86_64Tlv(u1:0-4) 00000000 ff 17 - return v0, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif deleted file mode 100644 index 3747ac9f05..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_macho.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=macho -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [macho_tls_get_addr#00,%rax] v1 = x86_macho_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif deleted file mode 100644 index 931b6e0aca..0000000000 --- a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i16 fast { -block0(v0: i8): - v1 = uextend.i16 v0 - return v1 -} - -function u0:1(i8) -> i16 fast { -block0(v0: i8): - v1 = sextend.i16 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif deleted file mode 100644 index 13cf504d13..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ /dev/null @@ -1,255 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; check if for one arg we use the right register -function %one_arg(i64) windows_fastcall { -block0(v0: i64): - return -} -; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rbp]): -; nextln: x86_push v1 -; nextln: copy_special %rsp -> %rbp -; nextln: v2 = x86_pop.i64 -; nextln: return v2 -; nextln: } - -; check if we still use registers for 4 arguments -function %four_args(i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - return -} -; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if float arguments are passed through XMM registers -function %four_float_args(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): - return -} -; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if we use stack space for > 4 arguments -function %five_args(i64, i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): - return -} -; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]): -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -; check that we preserve xmm6 and above if we're using them locally -function %float_callee_saves(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): -; explicitly use a callee-save register -[-, %xmm6] v4 = fadd v0, v1 -[-, %xmm7] v5 = fadd v0, v1 - return -} -; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall { -; nextln: ss0 = incoming_arg 48, offset -48 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]): -; nextln: x86_push v7 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: store notrap aligned v8, v6+16 -; nextln: store notrap aligned v9, v6 -; nextln: v11 = load.f64x2 notrap aligned v6+16 -; nextln: v12 = load.f64x2 notrap aligned v6 -; nextln: adjust_sp_up_imm 32 -; nextln: v10 = x86_pop.i64 -; nextln: return v10, v11, v12 -; nextln: } - -function %mixed_int_float(i64, f64, i64, f32) windows_fastcall { -block0(v0: i64, v1: f64, v2: i64, v3: f32): - return -} -; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall { -block0(v0: f32, v1: f64, v2: i64, v3: i64): - return v1 -} -; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: regmove v1, %xmm1 -> %xmm0 -; nextln: v5 = x86_pop.i64 -; nextln: return v1, v5 -; nextln: } - -function %ret_val_i128(i64, i64) -> i128 windows_fastcall { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - return v2 -} -; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall { - -; check if i128 is passed by reference -function %i128_arg(i128) windows_fastcall { -block0(v0: i128): - return -} -; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -; check if vector types are passed by reference -function %i32x4_arg(i32x4) windows_fastcall { -block0(v0: i32x4): - return -} -; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall { - fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall - fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall -block0(v0: i64): - v1 = load.i64 v0+0 - v2 = load.i64 v0+8 - v3 = load.i64 v0+16 - v4 = load.i64 v0+24 - v5 = load.i64 v0+32 - v6 = load.i64 v0+40 - v7 = load.i64 v0+48 - v8 = load.i64 v0+56 - v9 = load.i64 v0+64 - v10 = call fn0(v1, v2, v3, v4) - store.i64 v1, v0+8 - store.i64 v2, v0+16 - store.i64 v3, v0+24 - store.i64 v4, v0+32 - store.i64 v5, v0+40 - store.i64 v6, v0+48 - store.i64 v7, v0+56 - store.i64 v8, v0+64 - store.i64 v9, v0+72 - return v10 -} -; check: function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall { -; nextln: ss0 = spill_slot 8, offset -56 -; nextln: ss1 = spill_slot 8, offset -64 -; nextln: ss2 = spill_slot 8, offset -72 -; nextln: ss3 = spill_slot 8, offset -80 -; nextln: ss4 = spill_slot 8, offset -88 -; nextln: ss5 = spill_slot 8, offset -96 -; nextln: ss6 = spill_slot 8, offset -104 -; nextln: ss7 = spill_slot 8, offset -112 -; nextln: ss8 = spill_slot 8, offset -120 -; nextln: ss9 = spill_slot 8, offset -128 -; nextln: ss10 = incoming_arg 48, offset -48 -; nextln: ss11 = explicit_slot 32, offset -160 -; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: fn0 = %foo sig0 -; nextln: fn1 = %foo2 sig1 -; check: block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]): -; nextln: x86_push v52 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v53 -; nextln: x86_push v54 -; nextln: x86_push v55 -; nextln: x86_push v56 -; nextln: adjust_sp_down_imm 112 -; nextln: v0 = spill v11 -; nextln: v12 = copy_to_ssa.i64 %rcx -; nextln: v13 = load.i64 v12 -; nextln: v1 = spill v13 -; nextln: v14 = fill_nop v0 -; nextln: v15 = load.i64 v14+8 -; nextln: v2 = spill v15 -; nextln: v16 = fill_nop v0 -; nextln: v17 = load.i64 v16+16 -; nextln: v3 = spill v17 -; nextln: v18 = fill_nop v0 -; nextln: v19 = load.i64 v18+24 -; nextln: v4 = spill v19 -; nextln: v20 = fill_nop v0 -; nextln: v21 = load.i64 v20+32 -; nextln: v5 = spill v21 -; nextln: v22 = fill_nop v0 -; nextln: v23 = load.i64 v22+40 -; nextln: v6 = spill v23 -; nextln: v24 = fill_nop v0 -; nextln: v25 = load.i64 v24+48 -; nextln: v7 = spill v25 -; nextln: v26 = fill_nop v0 -; nextln: v27 = load.i64 v26+56 -; nextln: v8 = spill v27 -; nextln: v28 = fill_nop v0 -; nextln: v29 = load.i64 v28+64 -; nextln: v9 = spill v29 -; nextln: v30 = fill v1 -; nextln: v31 = fill v2 -; nextln: v32 = fill v3 -; nextln: v33 = fill v4 -; nextln: regmove v30, %r15 -> %rcx -; nextln: regmove v31, %r14 -> %rdx -; nextln: regmove v32, %r13 -> %r8 -; nextln: regmove v33, %r12 -> %r9 -; nextln: v10 = call fn0(v30, v31, v32, v33) -; nextln: v34 = fill v1 -; nextln: v35 = fill v0 -; nextln: store v34, v35+8 -; nextln: v36 = fill v2 -; nextln: v37 = fill_nop v0 -; nextln: store v36, v37+16 -; nextln: v38 = fill v3 -; nextln: v39 = fill_nop v0 -; nextln: store v38, v39+24 -; nextln: v40 = fill v4 -; nextln: v41 = fill_nop v0 -; nextln: store v40, v41+32 -; nextln: v42 = fill v5 -; nextln: v43 = fill_nop v0 -; nextln: store v42, v43+40 -; nextln: v44 = fill v6 -; nextln: v45 = fill_nop v0 -; nextln: store v44, v45+48 -; nextln: v46 = fill v7 -; nextln: v47 = fill_nop v0 -; nextln: store v46, v47+56 -; nextln: v48 = fill v8 -; nextln: v49 = fill_nop v0 -; nextln: store v48, v49+64 -; nextln: v50 = fill v9 -; nextln: v51 = fill_nop v0 -; nextln: store v50, v51+72 -; nextln: adjust_sp_up_imm 112 -; nextln: v61 = x86_pop.i64 -; nextln: v60 = x86_pop.i64 -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: return v10, v57, v58, v59, v60, v61 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif deleted file mode 100644 index 547e131fbd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ /dev/null @@ -1,250 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-windows legacy haswell - -; check the unwind information with a leaf function with no args -function %no_args_leaf() windows_fastcall { -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 4 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 1 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 - -; check the unwind information with a non-leaf function with no args -function %no_args() windows_fastcall { - fn0 = %foo() -block0: - call fn0() - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 8 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 8 -; nextln: op: SmallStackAlloc -; nextln: info: 3 - -; check a function with medium-sized stack alloc -function %medium_stack() windows_fastcall { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 12500 (u16) - -; check a function with large-sized stack alloc -function %large_stack() windows_fastcall { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 1 -; nextln: value: 524288 (u32) - -function %fpr_with_function_call(i64, i64) windows_fastcall { - fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall; -block0(v0: i64, v1: i64): - v2 = load.f64 v0+0 - v3 = load.f64 v0+8 - v4 = load.i64 v0+16 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - call fn0(v2, v3, v4, v1, v1) - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; Only check the first unwind code here because this test specifically looks to -; see that in a function that is not a leaf, a callee-save FPR is stored in an -; area that does not overlap either the callee's shadow space or stack argument -; space. -; -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 22 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 4 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 13 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 23 (u16) -; nextln: -; nextln: offset: 22 -; nextln: op: SaveXmm128 -; nextln: info: 15 -; nextln: value: 10 (u16) - -; check a function that has CSRs -function %lots_of_registers(i64, i64) windows_fastcall { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 35 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 12 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 5 -; nextln: op: PushNonvolatileRegister -; nextln: info: 3 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 6 -; nextln: -; nextln: offset: 7 -; nextln: op: PushNonvolatileRegister -; nextln: info: 7 -; nextln: -; nextln: offset: 9 -; nextln: op: PushNonvolatileRegister -; nextln: info: 12 -; nextln: -; nextln: offset: 11 -; nextln: op: PushNonvolatileRegister -; nextln: info: 13 -; nextln: -; nextln: offset: 13 -; nextln: op: PushNonvolatileRegister -; nextln: info: 14 -; nextln: -; nextln: offset: 15 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 19 -; nextln: op: SmallStackAlloc -; nextln: info: 8 -; nextln: -; nextln: offset: 24 -; nextln: op: SaveXmm128 -; nextln: info: 6 -; nextln: value: 3 (u16) -; nextln: -; nextln: offset: 29 -; nextln: op: SaveXmm128 -; nextln: info: 7 -; nextln: value: 2 (u16) -; nextln: -; nextln: offset: 35 -; nextln: op: SaveXmm128 -; nextln: info: 8 -; nextln: value: 1 (u16) diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif deleted file mode 100644 index b58bf9bcb5..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif +++ /dev/null @@ -1,89 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits(i128) -> i128 { -block0(v0: i128): - v1 = bitrev.i128 v0 - return v1 -} - -; check: block0(v2: i64, v3: i64): -; check: v0 = iconcat v2, v3 -; check: v33 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v6 = band v2, v33 -; check: v7 = ushr_imm v6, 1 -; check: v34 = iconst.i64 0x5555_5555_5555_5555 -; check: v8 = band v2, v34 -; check: v9 = ishl_imm v8, 1 -; check: v10 = bor v7, v9 -; check: v35 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v11 = band v10, v35 -; check: v12 = ushr_imm v11, 2 -; check: v36 = iconst.i64 0x3333_3333_3333_3333 -; check: v13 = band v10, v36 -; check: v14 = ishl_imm v13, 2 -; check: v15 = bor v12, v14 -; check: v37 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v16 = band v15, v37 -; check: v17 = ushr_imm v16, 4 -; check: v38 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v18 = band v15, v38 -; check: v19 = ishl_imm v18, 4 -; check: v20 = bor v17, v19 -; check: v39 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v21 = band v20, v39 -; check: v22 = ushr_imm v21, 8 -; check: v40 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v23 = band v20, v40 -; check: v24 = ishl_imm v23, 8 -; check: v25 = bor v22, v24 -; check: v41 = iconst.i64 0xffff_0000_ffff_0000 -; check: v26 = band v25, v41 -; check: v27 = ushr_imm v26, 16 -; check: v42 = iconst.i64 0xffff_0000_ffff -; check: v28 = band v25, v42 -; check: v29 = ishl_imm v28, 16 -; check: v30 = bor v27, v29 -; check: v31 = ushr_imm v30, 32 -; check: v32 = ishl_imm v30, 32 -; check: v4 = bor v31, v32 -; check: v70 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v43 = band v3, v70 -; check: v44 = ushr_imm v43, 1 -; check: v71 = iconst.i64 0x5555_5555_5555_5555 -; check: v45 = band v3, v71 -; check: v46 = ishl_imm v45, 1 -; check: v47 = bor v44, v46 -; check: v72 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v48 = band v47, v72 -; check: v49 = ushr_imm v48, 2 -; check: v73 = iconst.i64 0x3333_3333_3333_3333 -; check: v50 = band v47, v73 -; check: v51 = ishl_imm v50, 2 -; check: v52 = bor v49, v51 -; check: v74 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v53 = band v52, v74 -; check: v54 = ushr_imm v53, 4 -; check: v75 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v55 = band v52, v75 -; check: v56 = ishl_imm v55, 4 -; check: v57 = bor v54, v56 -; check: v76 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v58 = band v57, v76 -; check: v59 = ushr_imm v58, 8 -; check: v77 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v60 = band v57, v77 -; check: v61 = ishl_imm v60, 8 -; check: v62 = bor v59, v61 -; check: v78 = iconst.i64 0xffff_0000_ffff_0000 -; check: v63 = band v62, v78 -; check: v64 = ushr_imm v63, 16 -; check: v79 = iconst.i64 0xffff_0000_ffff -; check: v65 = band v62, v79 -; check: v66 = ishl_imm v65, 16 -; check: v67 = bor v64, v66 -; check: v68 = ushr_imm v67, 32 -; check: v69 = ishl_imm v67, 32 -; check: v5 = bor v68, v69 -; check: v1 = iconcat v5, v4 -; check: return v5, v4 diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif deleted file mode 100644 index 6c9ead0fe2..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev.clif +++ /dev/null @@ -1,206 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits_8(i8) -> i8 { -block0(v0: i8): - v1 = bitrev.i8 v0 - return v1 -} -; check: v16 = uextend.i32 v0 -; check: v17 = band_imm v16, 170 -; check: v2 = ireduce.i8 v17 -; check: v18 = uextend.i32 v2 -; check: v19 = ushr_imm v18, 1 -; check: v3 = ireduce.i8 v19 -; check: v20 = uextend.i32 v0 -; check: v21 = band_imm v20, 85 -; check: v4 = ireduce.i8 v21 -; check: v22 = uextend.i32 v4 -; check: v23 = ishl_imm v22, 1 -; check: v5 = ireduce.i8 v23 -; check: v24 = uextend.i32 v3 -; check: v25 = uextend.i32 v5 -; check: v26 = bor v24, v25 -; check: v6 = ireduce.i8 v26 -; check: v27 = uextend.i32 v6 -; check: v28 = band_imm v27, 204 -; check: v7 = ireduce.i8 v28 -; check: v29 = uextend.i32 v7 -; check: v30 = ushr_imm v29, 2 -; check: v8 = ireduce.i8 v30 -; check: v31 = uextend.i32 v6 -; check: v32 = band_imm v31, 51 -; check: v9 = ireduce.i8 v32 -; check: v33 = uextend.i32 v9 -; check: v34 = ishl_imm v33, 2 -; check: v10 = ireduce.i8 v34 -; check: v35 = uextend.i32 v8 -; check: v36 = uextend.i32 v10 -; check: v37 = bor v35, v36 -; check: v11 = ireduce.i8 v37 -; check: v38 = uextend.i32 v11 -; check: v39 = band_imm v38, 240 -; check: v12 = ireduce.i8 v39 -; check: v40 = uextend.i32 v12 -; check: v41 = ushr_imm v40, 4 -; check: v13 = ireduce.i8 v41 -; check: v42 = uextend.i32 v11 -; check: v43 = band_imm v42, 15 -; check: v14 = ireduce.i8 v43 -; check: v44 = uextend.i32 v14 -; check: v45 = ishl_imm v44, 4 -; check: v15 = ireduce.i8 v45 -; check: v46 = uextend.i32 v13 -; check: v47 = uextend.i32 v15 -; check: v48 = bor v46, v47 -; check: v1 = ireduce.i8 v48 -; check: return v1 - -function %reverse_bits_16(i16) -> i16 { -block0(v0: i16): - v1 = bitrev.i16 v0 - return v1 -} -; check: v21 = uextend.i32 v0 -; check: v22 = band_imm v21, 0xaaaa -; check: v2 = ireduce.i16 v22 -; check: v23 = uextend.i32 v2 -; check: v24 = ushr_imm v23, 1 -; check: v3 = ireduce.i16 v24 -; check: v25 = uextend.i32 v0 -; check: v26 = band_imm v25, 0x5555 -; check: v4 = ireduce.i16 v26 -; check: v27 = uextend.i32 v4 -; check: v28 = ishl_imm v27, 1 -; check: v5 = ireduce.i16 v28 -; check: v29 = uextend.i32 v3 -; check: v30 = uextend.i32 v5 -; check: v31 = bor v29, v30 -; check: v6 = ireduce.i16 v31 -; check: v32 = uextend.i32 v6 -; check: v33 = band_imm v32, 0xcccc -; check: v7 = ireduce.i16 v33 -; check: v34 = uextend.i32 v7 -; check: v35 = ushr_imm v34, 2 -; check: v8 = ireduce.i16 v35 -; check: v36 = uextend.i32 v6 -; check: v37 = band_imm v36, 0x3333 -; check: v9 = ireduce.i16 v37 -; check: v38 = uextend.i32 v9 -; check: v39 = ishl_imm v38, 2 -; check: v10 = ireduce.i16 v39 -; check: v40 = uextend.i32 v8 -; check: v41 = uextend.i32 v10 -; check: v42 = bor v40, v41 -; check: v11 = ireduce.i16 v42 -; check: v43 = uextend.i32 v11 -; check: v44 = band_imm v43, 0xf0f0 -; check: v12 = ireduce.i16 v44 -; check: v45 = uextend.i32 v12 -; check: v46 = ushr_imm v45, 4 -; check: v13 = ireduce.i16 v46 -; check: v47 = uextend.i32 v11 -; check: v48 = band_imm v47, 3855 -; check: v14 = ireduce.i16 v48 -; check: v49 = uextend.i32 v14 -; check: v50 = ishl_imm v49, 4 -; check: v15 = ireduce.i16 v50 -; check: v51 = uextend.i32 v13 -; check: v52 = uextend.i32 v15 -; check: v53 = bor v51, v52 -; check: v16 = ireduce.i16 v53 -; check: v54 = uextend.i32 v16 -; check: v55 = band_imm v54, 0xff00 -; check: v17 = ireduce.i16 v55 -; check: v56 = uextend.i32 v17 -; check: v57 = ushr_imm v56, 8 -; check: v18 = ireduce.i16 v57 -; check: v58 = uextend.i32 v16 -; check: v59 = band_imm v58, 255 -; check: v19 = ireduce.i16 v59 -; check: v60 = uextend.i32 v19 -; check: v61 = ishl_imm v60, 8 -; check: v20 = ireduce.i16 v61 -; check: v62 = uextend.i32 v18 -; check: v63 = uextend.i32 v20 -; check: v64 = bor v62, v63 -; check: v1 = ireduce.i16 v64 -; check: return v1 - -function %reverse_bits_32(i32) -> i32 { -block0(v0: i32): - v1 = bitrev.i32 v0 - return v1 -} -; check: v24 = iconst.i32 0xaaaa_aaaa -; check: v2 = band v0, v24 -; check: v3 = ushr_imm v2, 1 -; check: v4 = band_imm v0, 0x5555_5555 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v25 = iconst.i32 0xcccc_cccc -; check: v7 = band v6, v25 -; check: v8 = ushr_imm v7, 2 -; check: v9 = band_imm v6, 0x3333_3333 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v26 = iconst.i32 0xf0f0_f0f0 -; check: v12 = band v11, v26 -; check: v13 = ushr_imm v12, 4 -; check: v14 = band_imm v11, 0x0f0f_0f0f -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v27 = iconst.i32 0xff00_ff00 -; check: v17 = band v16, v27 -; check: v18 = ushr_imm v17, 8 -; check: v19 = band_imm v16, 0x00ff_00ff -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v22 = ushr_imm v21, 16 -; check: v23 = ishl_imm v21, 16 -; check: v1 = bor v22, v23 - - -function %reverse_bits_64(i64) -> i64 { -block0(v0: i64): - v1 = bitrev.i64 v0 - return v1 -} -; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v2 = band v0, v29 -; check: v3 = ushr_imm v2, 1 -; check: v30 = iconst.i64 0x5555_5555_5555_5555 -; check: v4 = band v0, v30 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v7 = band v6, v31 -; check: v8 = ushr_imm v7, 2 -; check: v32 = iconst.i64 0x3333_3333_3333_3333 -; check: v9 = band v6, v32 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v12 = band v11, v33 -; check: v13 = ushr_imm v12, 4 -; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v14 = band v11, v34 -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v17 = band v16, v35 -; check: v18 = ushr_imm v17, 8 -; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v19 = band v16, v36 -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v37 = iconst.i64 0xffff_0000_ffff_0000 -; check: v22 = band v21, v37 -; check: v23 = ushr_imm v22, 16 -; check: v38 = iconst.i64 0xffff_0000_ffff -; check: v24 = band v21, v38 -; check: v25 = ishl_imm v24, 16 -; check: v26 = bor v23, v25 -; check: v27 = ushr_imm v26, 32 -; check: v28 = ishl_imm v26, 32 -; check: v1 = bor v27, v28 diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif deleted file mode 100644 index db464ae4d4..0000000000 --- a/cranelift/filetests/filetests/legalizer/br_table_cond.clif +++ /dev/null @@ -1,64 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -; Test that when jump_tables_enables is false, all jump tables are eliminated. -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64 vmctx) baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - jt0 = jump_table [block2, block2, block7] - jt1 = jump_table [block8, block8] - -block0(v0: i64): - jump block5 - -block5: - v1 = global_value.i64 gv1 - v2 = load.i64 v1 - trapnz v2, interrupt - v3 = iconst.i32 0 - br_table v3, block3, jt0 -; check: block5: -; check: $(val0=$V) = iconst.i32 0 -; nextln: $(cmp0=$V) = icmp_imm eq $val0, 0 -; nextln: brnz $cmp0, block2 -; nextln: jump $(fail0=$BB) -; check: $fail0: -; nextln: $(cmp1=$V) = icmp_imm.i32 eq $val0, 1 -; nextln: brnz $cmp1, block2 -; nextln: jump $(fail1=$BB) -; check: $fail1: -; nextln: $(cmp2=$V) = icmp_imm.i32 eq $val0, 2 -; nextln: brnz $cmp2, block7 -; nextln: jump block3 - -block7: - v4 = iconst.i32 0 - br_table v4, block3, jt1 -; check: block7: -; check: $(val1=$V) = iconst.i32 0 -; nextln: $(cmp3=$V) = icmp_imm eq $val1, 0 -; nextln: brnz $cmp3, block8 -; nextln: jump $(fail3=$BB) -; check: $fail3: -; nextln: $(cmp4=$V) = icmp_imm.i32 eq $val1, 1 -; nextln: brnz $cmp4, block8 -; nextln: jump block3 - -block8: - jump block5 - -block3: - jump block2 - -block2: - jump block1 - -block1: - fallthrough_return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif deleted file mode 100644 index d320155470..0000000000 --- a/cranelift/filetests/filetests/legalizer/empty_br_table.clif +++ /dev/null @@ -1,17 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -function u0:0(i64) { - jt0 = jump_table [] - -block0(v0: i64): - br_table v0, block1, jt0 -; check: block0(v0: i64): -; nextln: jump block1 - -block1: - return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif deleted file mode 100644 index 6d72cc6499..0000000000 --- a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif +++ /dev/null @@ -1,23 +0,0 @@ -test legalizer -target x86_64 legacy - -function %icmp_imm_i128(i128) -> i8 { -block0(v0: i128): - v1 = icmp_imm.i128 eq v0, 1 - v2 = bint.i8 v1 - return v2 -} - -; check: function %icmp_imm_i128(i64 [%rdi], i64 [%rsi]) -> i8 [%rax] fast { -; nextln: block0(v3: i64, v4: i64): -; nextln: v7 -> v3 -; nextln: v8 -> v4 -; nextln: [-] v0 = iconcat v3, v4 -; nextln: [RexOp1pu_id#b8] v5 = iconst.i64 1 -; nextln: [RexOp1pu_id#b8] v6 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v9 = icmp eq v7, v5 -; nextln: [RexOp1icscc#8039] v10 = icmp eq v8, v6 -; nextln: [RexOp1rr#21] v1 = band v9, v10 -; nextln: [RexOp2urm_noflags#4b6] v2 = bint.i8 v1 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif deleted file mode 100644 index 141330cf01..0000000000 --- a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -target x86_64 legacy - -function %legalize_entry(i128) -> i64 windows_fastcall { -block0(v0: i128): - v1, v2 = isplit v0 - return v2 -} -; check: function %legalize_entry(i64 ptr [%rcx]) -> i64 [%rax] windows_fastcall { -; nextln: block0(v3: i64): -; nextln: v4 = load.i64 v3 -; nextln: v1 -> v4 -; nextln: v5 = load.i64 v3+8 -; nextln: v2 -> v5 -; nextln: v0 = iconcat v4, v5 -; nextln: return v2 - -function %legalize_call() { - fn0 = %foo(i32x4) windows_fastcall -block0: - v0 = vconst.i32x4 [1 2 3 4] - call fn0(v0) - return -} -; check: ss0 = explicit_slot 16 -; check: sig0 = (i64 ptr [%rcx]) windows_fastcall -; check: v0 = vconst.i32x4 const0 -; nextln: v1 = stack_addr.i64 ss0 -; nextln: store v0, v1 -; nextln: v2 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v2(v1) diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif deleted file mode 100644 index 8976ad0e25..0000000000 --- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif +++ /dev/null @@ -1,21 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -function %foo() -> i128 { -block0: - v1 = iconst.i64 0x6400000042 - v2 = iconst.i64 0x7F10100042 - v3 = iconcat v1, v2 - v4 = popcnt.i128 v3 - return v4 -} - -; check: v1 = iconst.i64 0x0064_0000_0042 -; check: v2 = iconst.i64 0x007f_1010_0042 -; check: v3 = iconcat v1, v2 -; check: v5 = popcnt v1 -; check: v6 = popcnt v2 -; check: v7 = iadd v5, v6 -; check: v8 = iconst.i64 0 -; check: v4 = iconcat v7, v8 -; check: return v7, v8 diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif index bb21ec2553..a7c059f6c0 100644 --- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif +++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif @@ -7,12 +7,15 @@ target x86_64 ;; we need to make an alias `v3 -> v2`. function %replace_inst_with_alias() -> i32 { + sig0 = (i32, i32) -> i32, i32 + fn0 = u0:0 sig0 + block0: v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 + v1, v2 = call fn0(v0, v0) v3 = isub v2, v0 ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 + ; nextln: v1, v2 = call fn0(v0, v0) ; nextln: v3 -> v2 return v3 } diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif deleted file mode 100644 index 55a8d03738..0000000000 --- a/cranelift/filetests/filetests/postopt/basic.clif +++ /dev/null @@ -1,125 +0,0 @@ -test postopt -target aarch64 -target i686 legacy - -; Test that compare+branch sequences are folded effectively on x86. - -function %br_icmp(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#85] brnz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif slt v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use brz instead of brnz, so the condition is inverted. - -function %br_icmp_inverse(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_inverse -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use icmp_imm instead of icmp. - -function %br_icmp_imm(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc_ib#7083] v2 = icmp_imm slt v0, 2 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_imm -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp_imm v0, 2 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use fcmp instead of icmp. - -function %br_fcmp(f32, f32) -> f32 { -block0(v0: f32, v1: f32): -[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000 -[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 -[Op1ret#c3] return v8 -} -; sameln: function %br_fcmp -; nextln: block0(v0: f32, v1: f32): -; nextln: v19 = ffcmp v0, v1 -; nextln: v2 = trueff gt v19 -; nextln: brff ule v19, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v18 = iconst.i32 0x40a8_0000 -; nextln: v8 = bitcast.f32 v18 -; nextln: return v8 -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif deleted file mode 100644 index acedb71087..0000000000 --- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif +++ /dev/null @@ -1,94 +0,0 @@ -test postopt -target x86_64 legacy - -function %dual_loads(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3 - v5 = uload8.i64 v3 - v6 = sload8.i64 v3 - v7 = uload16.i64 v3 - v8 = sload16.i64 v3 - v9 = uload32.i64 v3 - v10 = sload32.i64 v3 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1 -; nextln: v5 = uload8_complex.i64 v0+v1 -; nextln: v6 = sload8_complex.i64 v0+v1 -; nextln: v7 = uload16_complex.i64 v0+v1 -; nextln: v8 = sload16_complex.i64 v0+v1 -; nextln: v9 = uload32_complex v0+v1 -; nextln: v10 = sload32_complex v0+v1 -; nextln: return v10 -; nextln: } - -function %dual_loads2(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3+1 - v5 = uload8.i64 v3+1 - v6 = sload8.i64 v3+1 - v7 = uload16.i64 v3+1 - v8 = sload16.i64 v3+1 - v9 = uload32.i64 v3+1 - v10 = sload32.i64 v3+1 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads2 -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1+1 -; nextln: v5 = uload8_complex.i64 v0+v1+1 -; nextln: v6 = sload8_complex.i64 v0+v1+1 -; nextln: v7 = uload16_complex.i64 v0+v1+1 -; nextln: v8 = sload16_complex.i64 v0+v1+1 -; nextln: v9 = uload32_complex v0+v1+1 -; nextln: v10 = sload32_complex v0+v1+1 -; nextln: return v10 -; nextln: } - -function %dual_stores(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1st#8089] store.i64 v2, v3 -[RexOp1st#88] istore8.i64 v2, v3 -[RexMp1st#189] istore16.i64 v2, v3 -[RexOp1st#89] istore32.i64 v2, v3 -[Op1ret#c3] return -} - -; sameln: function %dual_stores -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1 -; nextln: istore8_complex v2, v0+v1 -; nextln: istore16_complex v2, v0+v1 -; nextln: istore32_complex v2, v0+v1 -; nextln: return -; nextln: } - -function %dual_stores2(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1stDisp8#8089] store.i64 v2, v3+1 -[RexOp1stDisp8#88] istore8.i64 v2, v3+1 -[RexMp1stDisp8#189] istore16.i64 v2, v3+1 -[RexOp1stDisp8#89] istore32.i64 v2, v3+1 -[Op1ret#c3] return -} - -; sameln: function %dual_stores2 -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1+1 -; nextln: istore8_complex v2, v0+v1+1 -; nextln: istore16_complex v2, v0+v1+1 -; nextln: istore32_complex v2, v0+v1+1 -; nextln: return -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif deleted file mode 100644 index 84ddf3b884..0000000000 --- a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif +++ /dev/null @@ -1,32 +0,0 @@ -test postopt -target x86_64 legacy - -; Fold the immediate of an iadd_imm into an address offset. - -function u0:0(i64 vmctx) -> i64 { -block0(v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v1 -[Op1ret#c3] return v2 -} - -; sameln: function u0:0(i64 vmctx) -> i64 fast { -; nextln: block0(v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v0+16 -; nextln: [Op1ret#c3] return v2 -; nextln: } - -function u0:1(i64, i64 vmctx) { -block0(v3: i64, v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1stDisp8#8089] store.i64 notrap aligned v3, v1 -[Op1ret#c3] return -} - -; sameln: function u0:1(i64, i64 vmctx) fast { -; nextln: block0(v3: i64, v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1stDisp8#8089] store notrap aligned v3, v0+16 -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif deleted file mode 100644 index e3dcfbad90..0000000000 --- a/cranelift/filetests/filetests/regalloc/aliases.clif +++ /dev/null @@ -1,35 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v { - gv0 = vmctx - heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v1: f32, v2: i64): - v3 = iconst.i32 0 - jump block3(v3) - -block3(v4: i32): - v5 = heap_addr.i64 heap0, v4, 1 - v6 = load.f32 v5 - v7 -> v1 - v8 = fdiv v6, v7 - v9 = heap_addr.i64 heap0, v4, 1 - store v8, v9 - v10 = iconst.i32 4 - v11 = iadd v4, v10 - v12 -> v0 - v13 = icmp ult v11, v12 - v14 = bint.i32 v13 - brnz v14, block3(v11) - jump block4 - -block4: - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/basic.clif b/cranelift/filetests/filetests/regalloc/basic.clif deleted file mode 100644 index 48111253ae..0000000000 --- a/cranelift/filetests/filetests/regalloc/basic.clif +++ /dev/null @@ -1,80 +0,0 @@ -test regalloc - -; We can add more ISAs once they have defined encodings. -target riscv32 - -; regex: RX=%x\d+ - -function %add(i32, i32) { -block0(v1: i32, v2: i32): - v3 = iadd v1, v2 -; check: [R#0c,%x5] -; sameln: iadd - return -} - -; Function with a dead argument. -function %dead_arg(i32, i32) -> i32{ -block0(v1: i32, v2: i32): -; not: regmove -; check: return v1 - return v1 -} - -; Return a value from a different register. -function %move1(i32, i32) -> i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> %x10 -; nextln: return v2 - return v2 -} - -; Swap two registers. -function %swap(i32, i32) -> i32, i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> $(tmp=$RX) -; nextln: regmove v1, %x10 -> %x11 -; nextln: regmove v2, $tmp -> %x10 -; nextln: return v2, v1 - return v2, v1 -} - -; Return a block argument. -function %retblock(i32, i32) -> i32 { -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - return v10 -} - -; Pass a block argument as a function argument. -function %callblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - v11 = call fn0(v10) - return v11 -} - -; Pass a block argument as a jump argument. -function %jumpblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1, v2) - jump block1(v2, v1) - -block1(v10: i32, v11: i32): - jump block2(v10, v11) - -block2(v20: i32, v21: i32): - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif deleted file mode 100644 index 48395da1b3..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalesce.clif +++ /dev/null @@ -1,157 +0,0 @@ -test regalloc -target riscv32 - -; Test the coalescer. -; regex: V=v\d+ -; regex: WS=\s+ -; regex: LOC=%\w+ -; regex: BB=block\d+ - -; This function is already CSSA, so no copies should be inserted. -function %cssa(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; v0 is used by the branch and passed as an arg - that's no conflict. - brnz v0, block1(v0) - jump block2 - -block2: - ; v0 is live across the branch above. That's no conflict. - v1 = iadd_imm v0, 7 - jump block1(v1) - -block1(v10: i32): - v11 = iadd_imm v10, 7 - return v11 -} - -function %trivial(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0) - jump block2 - -block2: - ; not: copy - v1 = iadd_imm v0, 7 - jump block1(v1) - - ; check: $splitEdge: - ; nextln: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1) - -block1(v10: i32): - ; Use v0 in the destination block causes a conflict. - v11 = iadd v10, v0 - return v11 -} - -; A value is used as an SSA argument twice in the same branch. -function %dualuse(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0, v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - v2 = iadd_imm v1, 56 - jump block1(v1, v2) - - ; check: $splitEdge: - ; check: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1, v0) - -block1(v10: i32, v11: i32): - v12 = iadd v10, v11 - return v12 -} - -; Interference away from the branch -; The interference can be broken with a copy at either branch. -function %interference(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; check: brnz v0, $(splitEdge=$BB) - ; not: copy - brnz v0, block1(v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - ; v1 and v0 interfere here: - v2 = iadd_imm v0, 8 - ; check: $(cp0=$V) = copy v1 - ; check: jump block1($cp0) - jump block1(v1) - - ; check: $splitEdge: - ; not: copy - ; nextln: jump block1(v0) - -block1(v10: i32): - ; not: copy - v11 = iadd_imm v10, 7 - return v11 -} - -; A loop where one induction variable is used as a backedge argument. -function %fibonacci(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 1 - v2 = iconst.i32 2 - jump block1(v1, v2) - - ; check: $(splitEdge=$BB): - ; check: $(nv11b=$V) = copy.i32 v11 - ; not: copy - ; check: jump block1($nv11b, v12) - -block1(v10: i32, v11: i32): - ; v11 needs to be isolated because it interferes with v10. - ; check: block1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC]) - ; check: v11 = copy $nv11a - v12 = iadd v10, v11 - v13 = icmp ult v12, v0 - ; check: brnz v13, $splitEdge - brnz v13, block1(v11, v12) - jump block2 - -block2: - return v12 -} - -; Function arguments passed on the stack aren't allowed to be part of a virtual -; register, at least for now. This is because the other values in the virtual -; register would need to be spilled to the incoming_arg stack slot which we treat -; as belonging to the caller. -function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): - ; check: fill v8 - ; not: v8 - jump block1(v8) - -block1(v10: i32): - v11 = iadd_imm v10, 1 - return v11 -} - -function %gvn_unremovable_phi(i32) system_v { -block0(v0: i32): - v2 = iconst.i32 0 - jump block2(v2, v0) - -block2(v3: i32, v4: i32): - brnz v3, block2(v3, v4) - jump block3 - -block3: - v5 = iconst.i32 1 - brnz v3, block2(v2, v5) - jump block4 - -block4: - return -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif deleted file mode 100644 index c549cbd3d2..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-207.clif +++ /dev/null @@ -1,1527 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/207 -; -; The coalescer creates a virtual register with two interfering values. -function %pr207(i64 vmctx, i32, i32) -> i32 system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx, i32, i32) -> i32 system_v - sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v - sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v - fn0 = u0:2 sig0 - fn1 = u0:0 sig1 - fn2 = u0:1 sig2 - -block0(v0: i64, v1: i32, v2: i32): - v3 = iconst.i32 0 - v4 = iconst.i32 0 - v5 = iconst.i32 0 - v6 = iconst.i32 0x4ffe - v7 = icmp uge v5, v6 - brz v7, block1 - jump block100 - -block100: - trap heap_oob - -block1: - v8 = uextend.i64 v5 - v9 = iadd_imm.i64 v0, -8 - v10 = load.i64 v9 - v11 = iadd v10, v8 - v12 = load.i32 v11+4 - v13 = iconst.i32 1056 - v14 = isub v12, v13 - v15 = iconst.i32 0x4ffe - v16 = icmp.i32 uge v4, v15 - brz v16, block2 - jump block101 - -block101: - trap heap_oob - -block2: - v17 = uextend.i64 v4 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v20 = iadd v19, v17 - store.i32 v14, v20+4 - v21 = iconst.i32 0x4ffe - v22 = icmp.i32 uge v2, v21 - brz v22, block3 - jump block102 - -block102: - trap heap_oob - -block3: - v23 = uextend.i64 v2 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v26 = iadd v25, v23 - v27 = sload8.i32 v26 - v28 = iconst.i32 255 - v29 = band v27, v28 - v30 = iconst.i32 0 - v31 = icmp eq v29, v30 - v32 = bint.i32 v31 - brnz v32, block90(v14, v1) - jump block103 - -block103: - v33 = call fn0(v0, v1, v27) - v34 = iconst.i32 0 - v35 = iconst.i32 0 - v36 = icmp eq v33, v35 - v37 = bint.i32 v36 - brnz v37, block90(v14, v34) - jump block104 - -block104: - v38 = iconst.i32 0x4ffe - v39 = icmp.i32 uge v2, v38 - brz v39, block4 - jump block105 - -block105: - trap heap_oob - -block4: - v40 = uextend.i64 v2 - v41 = iadd_imm.i64 v0, -8 - v42 = load.i64 v41 - v43 = iadd v42, v40 - v44 = uload8.i32 v43+1 - v45 = iconst.i32 0 - v46 = icmp eq v44, v45 - v47 = bint.i32 v46 - brnz v47, block56(v33, v14) - jump block106 - -block106: - v48 = iconst.i32 0x4ffe - v49 = icmp.i32 uge v33, v48 - brz v49, block5 - jump block107 - -block107: - trap heap_oob - -block5: - v50 = uextend.i64 v33 - v51 = iadd_imm.i64 v0, -8 - v52 = load.i64 v51 - v53 = iadd v52, v50 - v54 = uload8.i32 v53+1 - v55 = iconst.i32 0 - v56 = icmp eq v54, v55 - v57 = bint.i32 v56 - brnz v57, block90(v14, v34) - jump block108 - -block108: - v58 = iconst.i32 0x4ffe - v59 = icmp.i32 uge v2, v58 - brz v59, block6 - jump block109 - -block109: - trap heap_oob - -block6: - v60 = uextend.i64 v2 - v61 = iadd_imm.i64 v0, -8 - v62 = load.i64 v61 - v63 = iadd v62, v60 - v64 = uload8.i32 v63+2 - v65 = iconst.i32 0 - v66 = icmp eq v64, v65 - v67 = bint.i32 v66 - brnz v67, block42 - jump block110 - -block110: - v68 = iconst.i32 0x4ffe - v69 = icmp.i32 uge v33, v68 - brz v69, block7 - jump block111 - -block111: - trap heap_oob - -block7: - v70 = uextend.i64 v33 - v71 = iadd_imm.i64 v0, -8 - v72 = load.i64 v71 - v73 = iadd v72, v70 - v74 = uload8.i32 v73+2 - v75 = iconst.i32 0 - v76 = icmp eq v74, v75 - v77 = bint.i32 v76 - brnz v77, block90(v14, v34) - jump block112 - -block112: - v78 = iconst.i32 0x4ffe - v79 = icmp.i32 uge v2, v78 - brz v79, block8 - jump block113 - -block113: - trap heap_oob - -block8: - v80 = uextend.i64 v2 - v81 = iadd_imm.i64 v0, -8 - v82 = load.i64 v81 - v83 = iadd v82, v80 - v84 = uload8.i32 v83+3 - v85 = iconst.i32 0 - v86 = icmp eq v84, v85 - v87 = bint.i32 v86 - brnz v87, block46 - jump block114 - -block114: - v88 = iconst.i32 0x4ffe - v89 = icmp.i32 uge v33, v88 - brz v89, block9 - jump block115 - -block115: - trap heap_oob - -block9: - v90 = uextend.i64 v33 - v91 = iadd_imm.i64 v0, -8 - v92 = load.i64 v91 - v93 = iadd v92, v90 - v94 = uload8.i32 v93+3 - v95 = iconst.i32 0 - v96 = icmp eq v94, v95 - v97 = bint.i32 v96 - brnz v97, block90(v14, v34) - jump block116 - -block116: - v98 = iconst.i32 0x4ffe - v99 = icmp.i32 uge v2, v98 - brz v99, block10 - jump block117 - -block117: - trap heap_oob - -block10: - v100 = uextend.i64 v2 - v101 = iadd_imm.i64 v0, -8 - v102 = load.i64 v101 - v103 = iadd v102, v100 - v104 = uload8.i32 v103+4 - v105 = iconst.i32 0 - v106 = icmp eq v104, v105 - v107 = bint.i32 v106 - brnz v107, block54 - jump block118 - -block118: - v108 = iconst.i32 1 - v109 = iadd.i32 v2, v108 - v110 = iconst.i32 1048 - v111 = iadd.i32 v14, v110 - v112 = iconst.i64 0 - v113 = iconst.i32 0x4ffe - v114 = icmp uge v111, v113 - brz v114, block11 - jump block119 - -block119: - trap heap_oob - -block11: - v115 = uextend.i64 v111 - v116 = iadd_imm.i64 v0, -8 - v117 = load.i64 v116 - v118 = iadd v117, v115 - store.i64 v112, v118 - v119 = iconst.i32 1040 - v120 = iadd.i32 v14, v119 - v121 = iconst.i64 0 - v122 = iconst.i32 0x4ffe - v123 = icmp uge v120, v122 - brz v123, block12 - jump block120 - -block120: - trap heap_oob - -block12: - v124 = uextend.i64 v120 - v125 = iadd_imm.i64 v0, -8 - v126 = load.i64 v125 - v127 = iadd v126, v124 - store.i64 v121, v127 - v128 = iconst.i64 0 - v129 = iconst.i32 0x4ffe - v130 = icmp.i32 uge v14, v129 - brz v130, block13 - jump block121 - -block121: - trap heap_oob - -block13: - v131 = uextend.i64 v14 - v132 = iadd_imm.i64 v0, -8 - v133 = load.i64 v132 - v134 = iadd v133, v131 - store.i64 v128, v134+1032 - v135 = iconst.i64 0 - v136 = iconst.i32 0x4ffe - v137 = icmp.i32 uge v14, v136 - brz v137, block14 - jump block122 - -block122: - trap heap_oob - -block14: - v138 = uextend.i64 v14 - v139 = iadd_imm.i64 v0, -8 - v140 = load.i64 v139 - v141 = iadd v140, v138 - store.i64 v135, v141+1024 - v142 = iconst.i32 -1 - jump block15(v142, v27) - -block15(v143: i32, v144: i32): - v145 = iadd.i32 v33, v143 - v146 = iconst.i32 1 - v147 = iadd v145, v146 - v148 = iconst.i32 0x4ffe - v149 = icmp uge v147, v148 - brz v149, block16 - jump block123 - -block123: - trap heap_oob - -block16: - v150 = uextend.i64 v147 - v151 = iadd_imm.i64 v0, -8 - v152 = load.i64 v151 - v153 = iadd v152, v150 - v154 = uload8.i32 v153 - v155 = iconst.i32 0 - v156 = icmp eq v154, v155 - v157 = bint.i32 v156 - brnz v157, block89(v14) - jump block124 - -block124: - v158 = iconst.i32 255 - v159 = band.i32 v144, v158 - v160 = iconst.i32 2 - v161 = ishl v159, v160 - v162 = iadd.i32 v14, v161 - v163 = iconst.i32 2 - v164 = iadd.i32 v143, v163 - v165 = iconst.i32 0x4ffe - v166 = icmp uge v162, v165 - brz v166, block17 - jump block125 - -block125: - trap heap_oob - -block17: - v167 = uextend.i64 v162 - v168 = iadd_imm.i64 v0, -8 - v169 = load.i64 v168 - v170 = iadd v169, v167 - store.i32 v164, v170 - v171 = iconst.i32 1024 - v172 = iadd.i32 v14, v171 - v173 = iconst.i32 3 - v174 = ushr.i32 v159, v173 - v175 = iconst.i32 28 - v176 = band v174, v175 - v177 = iadd v172, v176 - v178 = iconst.i32 0x4ffe - v179 = icmp uge v177, v178 - brz v179, block18 - jump block126 - -block126: - trap heap_oob - -block18: - v180 = uextend.i64 v177 - v181 = iadd_imm.i64 v0, -8 - v182 = load.i64 v181 - v183 = iadd v182, v180 - v184 = load.i32 v183 - v185 = iconst.i32 1 - v186 = iconst.i32 31 - v187 = band.i32 v144, v186 - v188 = ishl v185, v187 - v189 = bor v184, v188 - v190 = iconst.i32 0x4ffe - v191 = icmp.i32 uge v177, v190 - brz v191, block19 - jump block127 - -block127: - trap heap_oob - -block19: - v192 = uextend.i64 v177 - v193 = iadd_imm.i64 v0, -8 - v194 = load.i64 v193 - v195 = iadd v194, v192 - store.i32 v189, v195 - v196 = iadd.i32 v109, v143 - v197 = iconst.i32 1 - v198 = iadd.i32 v143, v197 - v199 = iconst.i32 1 - v200 = iadd v196, v199 - v201 = iconst.i32 0x4ffe - v202 = icmp uge v200, v201 - brz v202, block20 - jump block128 - -block128: - trap heap_oob - -block20: - v203 = uextend.i64 v200 - v204 = iadd_imm.i64 v0, -8 - v205 = load.i64 v204 - v206 = iadd v205, v203 - v207 = uload8.i32 v206 - brnz v207, block15(v198, v207) - jump block21 - -block21: - v208 = iconst.i32 -1 - v209 = iconst.i32 1 - v210 = iconst.i32 -1 - v211 = iconst.i32 1 - v212 = iconst.i32 1 - v213 = iadd.i32 v198, v212 - v214 = iconst.i32 2 - v215 = icmp ult v213, v214 - v216 = bint.i32 v215 - brnz v216, block38(v2, v211, v209, v210, v208, v198, v213, v33, v14) - jump block129 - -block129: - v217 = iconst.i32 -1 - v218 = iconst.i32 0 - v219 = iconst.i32 1 - v220 = iconst.i32 1 - v221 = iconst.i32 1 - v222 = copy.i32 v44 - jump block22(v217, v221, v44, v220, v218, v219, v213, v222, v198, v33, v14) - -block22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i32, v230: i32, v231: i32, v232: i32, v233: i32): - v234 = copy v228 - v235 = iadd v223, v224 - v236 = iadd.i32 v2, v235 - v237 = iconst.i32 0x4ffe - v238 = icmp uge v236, v237 - brz v238, block23 - jump block130 - -block130: - trap heap_oob - -block23: - v239 = uextend.i64 v236 - v240 = iadd_imm.i64 v0, -8 - v241 = load.i64 v240 - v242 = iadd v241, v239 - v243 = uload8.i32 v242 - v244 = iconst.i32 255 - v245 = band.i32 v225, v244 - v246 = icmp ne v243, v245 - v247 = bint.i32 v246 - brnz v247, block24 - jump block131 - -block131: - v248 = icmp.i32 ne v224, v226 - v249 = bint.i32 v248 - brnz v249, block25 - jump block132 - -block132: - v250 = iadd.i32 v227, v226 - v251 = iconst.i32 1 - jump block27(v251, v250, v223, v226) - -block24: - v252 = icmp.i32 ule v243, v245 - v253 = bint.i32 v252 - brnz v253, block26 - jump block133 - -block133: - v254 = isub.i32 v234, v223 - v255 = iconst.i32 1 - jump block27(v255, v234, v223, v254) - -block25: - v256 = iconst.i32 1 - v257 = iadd.i32 v224, v256 - v258 = copy.i32 v227 - jump block27(v257, v258, v223, v226) - -block26: - v259 = iconst.i32 1 - v260 = iconst.i32 1 - v261 = iadd.i32 v227, v260 - v262 = iconst.i32 1 - v263 = copy.i32 v227 - jump block27(v259, v261, v263, v262) - -block27(v264: i32, v265: i32, v266: i32, v267: i32): - v268 = iadd v264, v265 - v269 = icmp uge v268, v229 - v270 = bint.i32 v269 - brnz v270, block29 - jump block134 - -block134: - v271 = iadd.i32 v2, v268 - v272 = iconst.i32 0x4ffe - v273 = icmp uge v271, v272 - brz v273, block28 - jump block135 - -block135: - trap heap_oob - -block28: - v274 = uextend.i64 v271 - v275 = iadd_imm.i64 v0, -8 - v276 = load.i64 v275 - v277 = iadd v276, v274 - v278 = uload8.i32 v277 - v279 = copy.i32 v265 - jump block22(v266, v264, v278, v267, v279, v268, v229, v230, v231, v232, v233) - -block29: - jump block30 - -block30: - v280 = iconst.i32 -1 - v281 = iconst.i32 0 - v282 = iconst.i32 1 - v283 = iconst.i32 1 - v284 = iconst.i32 1 - jump block31(v280, v284, v230, v283, v281, v282, v229, v267, v266, v231, v232, v233) - -block31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i32, v292: i32, v293: i32, v294: i32, v295: i32, v296: i32): - v297 = copy v290 - v298 = iadd v285, v286 - v299 = iadd.i32 v2, v298 - v300 = iconst.i32 0x4ffe - v301 = icmp uge v299, v300 - brz v301, block32 - jump block136 - -block136: - trap heap_oob - -block32: - v302 = uextend.i64 v299 - v303 = iadd_imm.i64 v0, -8 - v304 = load.i64 v303 - v305 = iadd v304, v302 - v306 = uload8.i32 v305 - v307 = iconst.i32 255 - v308 = band.i32 v287, v307 - v309 = icmp ne v306, v308 - v310 = bint.i32 v309 - brnz v310, block33 - jump block137 - -block137: - v311 = icmp.i32 ne v286, v288 - v312 = bint.i32 v311 - brnz v312, block34 - jump block138 - -block138: - v313 = iadd.i32 v289, v288 - v314 = iconst.i32 1 - jump block36(v314, v313, v285, v288) - -block33: - v315 = icmp.i32 uge v306, v308 - v316 = bint.i32 v315 - brnz v316, block35 - jump block139 - -block139: - v317 = isub.i32 v297, v285 - v318 = iconst.i32 1 - jump block36(v318, v297, v285, v317) - -block34: - v319 = iconst.i32 1 - v320 = iadd.i32 v286, v319 - v321 = copy.i32 v289 - jump block36(v320, v321, v285, v288) - -block35: - v322 = iconst.i32 1 - v323 = iconst.i32 1 - v324 = iadd.i32 v289, v323 - v325 = iconst.i32 1 - v326 = copy.i32 v289 - jump block36(v322, v324, v326, v325) - -block36(v327: i32, v328: i32, v329: i32, v330: i32): - v331 = iadd v327, v328 - v332 = icmp uge v331, v291 - v333 = bint.i32 v332 - brnz v333, block38(v2, v330, v292, v329, v293, v294, v291, v295, v296) - jump block140 - -block140: - v334 = iadd.i32 v2, v331 - v335 = iconst.i32 0x4ffe - v336 = icmp uge v334, v335 - brz v336, block37 - jump block141 - -block141: - trap heap_oob - -block37: - v337 = uextend.i64 v334 - v338 = iadd_imm.i64 v0, -8 - v339 = load.i64 v338 - v340 = iadd v339, v337 - v341 = uload8.i32 v340 - v342 = copy.i32 v328 - jump block31(v329, v327, v341, v330, v342, v331, v291, v292, v293, v294, v295, v296) - -block38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i32, v350: i32, v351: i32): - v352 = iconst.i32 1 - v353 = iadd v346, v352 - v354 = iconst.i32 1 - v355 = iadd v347, v354 - v356 = icmp ugt v353, v355 - v357 = bint.i32 v356 - brnz v357, block39(v344) - jump block142 - -block142: - v358 = copy v345 - jump block39(v358) - -block39(v359: i32): - v360 = iadd.i32 v343, v359 - brnz.i32 v357, block40(v346) - jump block143 - -block143: - v361 = copy.i32 v347 - jump block40(v361) - -block40(v362: i32): - v363 = iconst.i32 1 - v364 = iadd v362, v363 - v365 = call fn1(v0, v343, v360, v364) - v366 = iconst.i32 0 - v367 = icmp eq v365, v366 - v368 = bint.i32 v367 - brnz v368, block63 - jump block144 - -block144: - v369 = iconst.i32 1 - v370 = iadd v362, v369 - v371 = isub.i32 v348, v370 - v372 = iconst.i32 1 - v373 = iadd v371, v372 - v374 = icmp ugt v362, v373 - v375 = bint.i32 v374 - v376 = copy v362 - brnz v375, block41(v376) - jump block145 - -block145: - v377 = copy v373 - jump block41(v377) - -block41(v378: i32): - v379 = iconst.i32 1 - v380 = iadd v378, v379 - v381 = iconst.i32 0 - jump block64(v380, v381) - -block42: - v382 = iconst.i32 8 - v383 = ishl.i32 v29, v382 - v384 = bor v383, v44 - v385 = iconst.i32 0x4ffe - v386 = icmp.i32 uge v33, v385 - brz v386, block43 - jump block146 - -block146: - trap heap_oob - -block43: - v387 = uextend.i64 v33 - v388 = iadd_imm.i64 v0, -8 - v389 = load.i64 v388 - v390 = iadd v389, v387 - v391 = uload8.i32 v390 - jump block44(v391, v54, v33) - -block44(v392: i32, v393: i32, v394: i32): - v395 = iconst.i32 8 - v396 = ishl v392, v395 - v397 = iconst.i32 0xff00 - v398 = band v396, v397 - v399 = iconst.i32 255 - v400 = band v393, v399 - v401 = bor v398, v400 - v402 = icmp eq v401, v384 - v403 = bint.i32 v402 - brnz v403, block56(v394, v14) - jump block147 - -block147: - v404 = iconst.i32 2 - v405 = iadd v394, v404 - v406 = iconst.i32 1 - v407 = iadd v394, v406 - v408 = iconst.i32 0x4ffe - v409 = icmp uge v405, v408 - brz v409, block45 - jump block148 - -block148: - trap heap_oob - -block45: - v410 = uextend.i64 v405 - v411 = iadd_imm.i64 v0, -8 - v412 = load.i64 v411 - v413 = iadd v412, v410 - v414 = uload8.i32 v413 - brnz v414, block44(v401, v414, v407) - jump block90(v14, v34) - -block46: - v415 = iconst.i32 8 - v416 = ishl.i32 v74, v415 - v417 = iconst.i32 16 - v418 = ishl.i32 v54, v417 - v419 = bor v416, v418 - v420 = iconst.i32 0x4ffe - v421 = icmp.i32 uge v33, v420 - brz v421, block47 - jump block149 - -block149: - trap heap_oob - -block47: - v422 = uextend.i64 v33 - v423 = iadd_imm.i64 v0, -8 - v424 = load.i64 v423 - v425 = iadd v424, v422 - v426 = uload8.i32 v425 - v427 = iconst.i32 24 - v428 = ishl v426, v427 - v429 = bor.i32 v419, v428 - v430 = iconst.i32 16 - v431 = ishl.i32 v44, v430 - v432 = iconst.i32 24 - v433 = ishl.i32 v29, v432 - v434 = bor v431, v433 - v435 = iconst.i32 8 - v436 = ishl.i32 v64, v435 - v437 = bor v434, v436 - v438 = icmp eq v429, v437 - v439 = bint.i32 v438 - brnz v439, block56(v33, v14) - jump block48(v33, v429) - -block48(v440: i32, v441: i32): - v442 = iconst.i32 1 - v443 = iadd v440, v442 - v444 = iconst.i32 3 - v445 = iadd v440, v444 - v446 = iconst.i32 0x4ffe - v447 = icmp uge v445, v446 - brz v447, block49 - jump block150 - -block150: - trap heap_oob - -block49: - v448 = uextend.i64 v445 - v449 = iadd_imm.i64 v0, -8 - v450 = load.i64 v449 - v451 = iadd v450, v448 - v452 = uload8.i32 v451 - v453 = iconst.i32 0 - v454 = icmp eq v452, v453 - v455 = bint.i32 v454 - brnz v455, block51(v14) - jump block151 - -block151: - v456 = bor.i32 v441, v452 - v457 = iconst.i32 8 - v458 = ishl v456, v457 - v459 = icmp ne v458, v437 - v460 = bint.i32 v459 - v461 = copy.i32 v443 - brnz v460, block48(v461, v458) - jump block50 - -block50: - jump block51(v14) - -block51(v462: i32): - v463 = iconst.i32 0 - v464 = iconst.i32 1056 - v465 = iadd v462, v464 - v466 = iconst.i32 0x4ffe - v467 = icmp uge v463, v466 - brz v467, block52 - jump block152 - -block152: - trap heap_oob - -block52: - v468 = uextend.i64 v463 - v469 = iadd_imm.i64 v0, -8 - v470 = load.i64 v469 - v471 = iadd v470, v468 - store.i32 v465, v471+4 - v472 = iconst.i32 0 - brnz.i32 v452, block53(v443) - jump block153 - -block153: - v473 = copy v472 - jump block53(v473) - -block53(v474: i32): - return v474 - -block54: - v475 = iconst.i32 8 - v476 = ishl.i32 v74, v475 - v477 = iconst.i32 16 - v478 = ishl.i32 v54, v477 - v479 = bor v476, v478 - v480 = bor v479, v94 - v481 = iconst.i32 0x4ffe - v482 = icmp.i32 uge v33, v481 - brz v482, block55 - jump block154 - -block154: - trap heap_oob - -block55: - v483 = uextend.i64 v33 - v484 = iadd_imm.i64 v0, -8 - v485 = load.i64 v484 - v486 = iadd v485, v483 - v487 = uload8.i32 v486 - v488 = iconst.i32 24 - v489 = ishl v487, v488 - v490 = bor.i32 v480, v489 - v491 = iconst.i32 16 - v492 = ishl.i32 v44, v491 - v493 = iconst.i32 24 - v494 = ishl.i32 v29, v493 - v495 = bor v492, v494 - v496 = iconst.i32 8 - v497 = ishl.i32 v64, v496 - v498 = bor v495, v497 - v499 = bor v498, v84 - v500 = icmp ne v490, v499 - v501 = bint.i32 v500 - brnz v501, block57 - jump block56(v33, v14) - -block56(v502: i32, v503: i32): - v504 = copy v502 - jump block90(v503, v504) - -block57: - jump block58(v33, v490) - -block58(v505: i32, v506: i32): - v507 = iconst.i32 4 - v508 = iadd v505, v507 - v509 = iconst.i32 1 - v510 = iadd v505, v509 - v511 = iconst.i32 0x4ffe - v512 = icmp uge v508, v511 - brz v512, block59 - jump block155 - -block155: - trap heap_oob - -block59: - v513 = uextend.i64 v508 - v514 = iadd_imm.i64 v0, -8 - v515 = load.i64 v514 - v516 = iadd v515, v513 - v517 = uload8.i32 v516 - v518 = iconst.i32 0 - v519 = icmp eq v517, v518 - v520 = bint.i32 v519 - brnz v520, block61(v14) - jump block156 - -block156: - v521 = iconst.i32 8 - v522 = ishl.i32 v506, v521 - v523 = bor v522, v517 - v524 = icmp ne v523, v499 - v525 = bint.i32 v524 - brnz v525, block58(v510, v523) - jump block60 - -block60: - jump block61(v14) - -block61(v526: i32): - v527 = iconst.i32 0 - brnz.i32 v517, block62(v510) - jump block157 - -block157: - v528 = copy v527 - jump block62(v528) - -block62(v529: i32): - v530 = copy v529 - jump block90(v526, v530) - -block63: - v531 = isub.i32 v348, v359 - v532 = iconst.i32 1 - v533 = iadd v531, v532 - jump block64(v359, v533) - -block64(v534: i32, v535: i32): - v536 = iconst.i32 1 - v537 = iadd.i32 v343, v536 - v538 = iconst.i32 0 - v539 = isub v538, v362 - v540 = iconst.i32 63 - v541 = bor.i32 v349, v540 - v542 = isub.i32 v348, v534 - v543 = iconst.i32 1 - v544 = iadd v542, v543 - v545 = iconst.i32 0 - v546 = copy.i32 v350 - jump block65(v350, v546, v349, v541, v348, v351, v544, v534, v545, v535, v343, v364, v537, v539, v362) - -block65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i32, v554: i32, v555: i32, v556: i32, v557: i32, v558: i32, v559: i32, v560: i32, v561: i32): - v562 = copy v556 - v563 = isub v547, v548 - v564 = icmp uge v563, v549 - v565 = bint.i32 v564 - brnz v565, block67(v547) - jump block158 - -block158: - v566 = iconst.i32 0 - v567 = call fn2(v0, v547, v566, v550) - brnz v567, block66 - jump block159 - -block159: - v568 = iadd v547, v550 - jump block67(v568) - -block66: - v569 = isub.i32 v567, v548 - v570 = icmp ult v569, v549 - v571 = bint.i32 v570 - brnz v571, block89(v552) - jump block160 - -block160: - v572 = copy.i32 v567 - jump block67(v572) - -block67(v573: i32): - v574 = iconst.i32 1 - v575 = iadd.i32 v548, v551 - v576 = iconst.i32 0x4ffe - v577 = icmp uge v575, v576 - brz v577, block68 - jump block161 - -block161: - trap heap_oob - -block68: - v578 = uextend.i64 v575 - v579 = iadd_imm.i64 v0, -8 - v580 = load.i64 v579 - v581 = iadd v580, v578 - v582 = uload8.i32 v581 - v583 = iconst.i32 31 - v584 = band v582, v583 - v585 = ishl.i32 v574, v584 - v586 = iconst.i32 1024 - v587 = iadd.i32 v552, v586 - v588 = iconst.i32 3 - v589 = ushr v582, v588 - v590 = iconst.i32 28 - v591 = band v589, v590 - v592 = iadd v587, v591 - v593 = iconst.i32 0x4ffe - v594 = icmp uge v592, v593 - brz v594, block69 - jump block162 - -block162: - trap heap_oob - -block69: - v595 = uextend.i64 v592 - v596 = iadd_imm.i64 v0, -8 - v597 = load.i64 v596 - v598 = iadd v597, v595 - v599 = load.i32 v598 - v600 = band.i32 v585, v599 - v601 = iconst.i32 0 - v602 = icmp eq v600, v601 - v603 = bint.i32 v602 - brnz v603, block74 - jump block163 - -block163: - v604 = iconst.i32 2 - v605 = ishl.i32 v582, v604 - v606 = iadd.i32 v552, v605 - v607 = iconst.i32 0x4ffe - v608 = icmp uge v606, v607 - brz v608, block70 - jump block164 - -block164: - trap heap_oob - -block70: - v609 = uextend.i64 v606 - v610 = iadd_imm.i64 v0, -8 - v611 = load.i64 v610 - v612 = iadd v611, v609 - v613 = load.i32 v612 - v614 = isub.i32 v551, v613 - v615 = iconst.i32 -1 - v616 = icmp eq v614, v615 - v617 = bint.i32 v616 - brnz v617, block75 - jump block165 - -block165: - v618 = iconst.i32 1 - v619 = iadd v614, v618 - v620 = icmp ult v619, v554 - v621 = bint.i32 v620 - v622 = copy.i32 v553 - brnz v621, block71(v622) - jump block166 - -block166: - v623 = copy v619 - jump block71(v623) - -block71(v624: i32): - v625 = copy v624 - brnz.i32 v555, block72(v625) - jump block72(v619) - -block72(v626: i32): - brnz.i32 v562, block73(v626) - jump block73(v619) - -block73(v627: i32): - v628 = copy.i32 v554 - v629 = copy.i32 v562 - jump block87(v548, v627, v573, v549, v550, v551, v552, v553, v628, v629, v557, v558, v559, v560, v561) - -block74: - v630 = copy.i32 v549 - v631 = copy.i32 v554 - v632 = copy.i32 v562 - jump block87(v548, v630, v573, v549, v550, v551, v552, v553, v631, v632, v557, v558, v559, v560, v561) - -block75: - v633 = icmp.i32 ugt v558, v555 - v634 = bint.i32 v633 - v635 = copy.i32 v558 - brnz v634, block76(v635) - jump block167 - -block167: - v636 = copy.i32 v555 - jump block76(v636) - -block76(v637: i32): - v638 = iadd.i32 v557, v637 - v639 = iconst.i32 0x4ffe - v640 = icmp uge v638, v639 - brz v640, block77 - jump block168 - -block168: - trap heap_oob - -block77: - v641 = uextend.i64 v638 - v642 = iadd_imm.i64 v0, -8 - v643 = load.i64 v642 - v644 = iadd v643, v641 - v645 = uload8.i32 v644 - v646 = iconst.i32 0 - v647 = icmp eq v645, v646 - v648 = bint.i32 v647 - brnz v648, block82(v548, v549, v551, v552) - jump block169 - -block169: - v649 = iadd.i32 v548, v637 - v650 = iadd.i32 v559, v637 - v651 = iadd.i32 v560, v637 - jump block78(v645, v649, v651, v650) - -block78(v652: i32, v653: i32, v654: i32, v655: i32): - v656 = iconst.i32 255 - v657 = band v652, v656 - v658 = iconst.i32 0x4ffe - v659 = icmp uge v653, v658 - brz v659, block79 - jump block170 - -block170: - trap heap_oob - -block79: - v660 = uextend.i64 v653 - v661 = iadd_imm.i64 v0, -8 - v662 = load.i64 v661 - v663 = iadd v662, v660 - v664 = uload8.i32 v663 - v665 = icmp.i32 ne v657, v664 - v666 = bint.i32 v665 - v667 = copy.i32 v554 - v668 = copy.i32 v562 - brnz v666, block87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561) - jump block171 - -block171: - v669 = iconst.i32 1 - v670 = iadd.i32 v653, v669 - v671 = iconst.i32 1 - v672 = iadd.i32 v654, v671 - v673 = iconst.i32 0x4ffe - v674 = icmp.i32 uge v655, v673 - brz v674, block80 - jump block172 - -block172: - trap heap_oob - -block80: - v675 = uextend.i64 v655 - v676 = iadd_imm.i64 v0, -8 - v677 = load.i64 v676 - v678 = iadd v677, v675 - v679 = uload8.i32 v678 - v680 = iconst.i32 1 - v681 = iadd.i32 v655, v680 - brnz v679, block78(v679, v670, v672, v681) - jump block81 - -block81: - jump block82(v548, v549, v551, v552) - -block82(v682: i32, v683: i32, v684: i32, v685: i32): - v686 = icmp.i32 ule v558, v555 - v687 = bint.i32 v686 - brnz v687, block90(v685, v682) - jump block173 - -block173: - v688 = copy.i32 v561 - jump block83(v688) - -block83(v689: i32): - v690 = iadd.i32 v557, v689 - v691 = iconst.i32 0x4ffe - v692 = icmp uge v690, v691 - brz v692, block84 - jump block174 - -block174: - trap heap_oob - -block84: - v693 = uextend.i64 v690 - v694 = iadd_imm.i64 v0, -8 - v695 = load.i64 v694 - v696 = iadd v695, v693 - v697 = uload8.i32 v696 - v698 = iadd.i32 v682, v689 - v699 = iconst.i32 0x4ffe - v700 = icmp uge v698, v699 - brz v700, block85 - jump block175 - -block175: - trap heap_oob - -block85: - v701 = uextend.i64 v698 - v702 = iadd_imm.i64 v0, -8 - v703 = load.i64 v702 - v704 = iadd v703, v701 - v705 = uload8.i32 v704 - v706 = icmp.i32 ne v697, v705 - v707 = bint.i32 v706 - brnz v707, block86 - jump block176 - -block176: - v708 = icmp.i32 ule v689, v555 - v709 = bint.i32 v708 - v710 = iconst.i32 -1 - v711 = iadd.i32 v689, v710 - v712 = iconst.i32 0 - v713 = icmp eq v709, v712 - v714 = bint.i32 v713 - brnz v714, block83(v711) - jump block90(v685, v682) - -block86: - v715 = copy.i32 v554 - v716 = copy.i32 v562 - jump block88(v682, v554, v573, v683, v550, v684, v685, v553, v715, v562, v716, v557, v558, v559, v560, v561) - -block87(v717: i32, v718: i32, v719: i32, v720: i32, v721: i32, v722: i32, v723: i32, v724: i32, v725: i32, v726: i32, v727: i32, v728: i32, v729: i32, v730: i32, v731: i32): - v732 = copy v718 - v733 = iconst.i32 0 - jump block88(v717, v732, v719, v720, v721, v722, v723, v724, v725, v733, v726, v727, v728, v729, v730, v731) - -block88(v734: i32, v735: i32, v736: i32, v737: i32, v738: i32, v739: i32, v740: i32, v741: i32, v742: i32, v743: i32, v744: i32, v745: i32, v746: i32, v747: i32, v748: i32, v749: i32): - v750 = iadd v734, v735 - v751 = copy v742 - v752 = copy v743 - v753 = copy v744 - jump block65(v736, v750, v737, v738, v739, v740, v741, v751, v752, v753, v745, v746, v747, v748, v749) - -block89(v754: i32): - v755 = iconst.i32 0 - jump block90(v754, v755) - -block90(v756: i32, v757: i32): - v758 = iconst.i32 0 - v759 = iconst.i32 1056 - v760 = iadd v756, v759 - v761 = iconst.i32 0x4ffe - v762 = icmp uge v758, v761 - brz v762, block91 - jump block177 - -block177: - trap heap_oob - -block91: - v763 = uextend.i64 v758 - v764 = iadd_imm.i64 v0, -8 - v765 = load.i64 v764 - v766 = iadd v765, v763 - store.i32 v760, v766+4 - jump block92(v757) - -block92(v767: i32): - return v767 -} - -; Same problem from musl.wasm. -function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v - fn0 = u0:517 sig0 - -block0(v0: f64, v1: i64): - v3 = iconst.i64 0 - v4 = iconst.i32 0 - v131 = iconst.i64 0 - v5 = bitcast.f64 v131 - v6 = iconst.i32 0 - v7 = iconst.i32 0 - v8 = iconst.i32 0 - v132 = uextend.i64 v8 - v133 = iadd_imm v1, 0 - v134 = load.i64 v133 - v9 = iadd v134, v132 - v10 = load.i32 v9+4 - v11 = iconst.i32 16 - v12 = isub v10, v11 - v135 = uextend.i64 v7 - v136 = iadd_imm v1, 0 - v137 = load.i64 v136 - v13 = iadd v137, v135 - store v12, v13+4 - v14 = bitcast.i64 v0 - v15 = iconst.i64 63 - v16 = ushr v14, v15 - v17 = ireduce.i32 v16 - v18 = iconst.i64 32 - v19 = ushr v14, v18 - v20 = ireduce.i32 v19 - v21 = iconst.i32 0x7fff_ffff - v22 = band v20, v21 - v23 = iconst.i32 0x4086_232b - v24 = icmp ult v22, v23 - v25 = bint.i32 v24 - brnz v25, block10 - jump block178 - -block178: - v26 = iconst.i64 0x7fff_ffff_ffff_ffff - v27 = band v14, v26 - v28 = iconst.i64 0x7ff0_0000_0000_0000 - v29 = icmp ule v27, v28 - v30 = bint.i32 v29 - brnz v30, block9 - jump block2(v12, v0) - -block10: - v31 = iconst.i32 0x3fd6_2e43 - v32 = icmp.i32 ult v22, v31 - v33 = bint.i32 v32 - brnz v33, block8 - jump block179 - -block179: - v34 = iconst.i32 0x3ff0_a2b2 - v35 = icmp.i32 uge v22, v34 - v36 = bint.i32 v35 - brnz v36, block6 - jump block180 - -block180: - v37 = iconst.i32 1 - v38 = bxor.i32 v17, v37 - v39 = isub v38, v17 - jump block5(v0, v39) - -block9: - v138 = iconst.i64 0x4086_2e42_fefa_39ef - v40 = bitcast.f64 v138 - v41 = fcmp ge v40, v0 - v42 = bint.i32 v41 - v139 = fcmp.f64 uno v0, v0 - v140 = fcmp.f64 one v0, v0 - v43 = bor v139, v140 - v44 = bint.i32 v43 - v45 = bor v42, v44 - brnz v45, block7 - jump block181 - -block181: - v141 = iconst.i64 0x7fe0_0000_0000_0000 - v46 = bitcast.f64 v141 - v47 = fmul.f64 v0, v46 - jump block2(v12, v47) - -block8: - v48 = iconst.i32 0x3e30_0000 - v49 = icmp.i32 ule v22, v48 - v50 = bint.i32 v49 - brnz v50, block3 - jump block182 - -block182: - v51 = iconst.i32 0 - v142 = iconst.i64 0 - v52 = bitcast.f64 v142 - v178 = copy.f64 v0 - jump block4(v0, v178, v52, v51) - -block7: - v143 = iconst.i64 0xc086_232b_dd7a_bcd2 - v53 = bitcast.f64 v143 - v54 = fcmp.f64 ge v0, v53 - v55 = bint.i32 v54 - v56 = bor v55, v44 - brnz v56, block6 - jump block183 - -block183: - v144 = iconst.i64 0xb6a0_0000_0000_0000 - v57 = bitcast.f64 v144 - v58 = fdiv v57, v0 - v59 = fdemote.f32 v58 - v145 = uextend.i64 v12 - v146 = iadd_imm.i64 v1, 0 - v147 = load.i64 v146 - v60 = iadd v147, v145 - store v59, v60+12 - v148 = iconst.i64 0 - v61 = bitcast.f64 v148 - v149 = iconst.i64 0xc087_4910_d52d_3051 - v62 = bitcast.f64 v149 - v63 = fcmp gt v62, v0 - v64 = bint.i32 v63 - brnz v64, block2(v12, v61) - jump block6 - -block6: - v150 = iconst.i64 0x3ff7_1547_652b_82fe - v66 = bitcast.f64 v150 - v67 = fmul.f64 v0, v66 - v69 = iconst.i32 3 - v70 = ishl.i32 v17, v69 - v71 = iconst.i32 5040 - v72 = iadd v70, v71 - v151 = uextend.i64 v72 - v152 = iadd_imm.i64 v1, 0 - v153 = load.i64 v152 - v73 = iadd v153, v151 - v74 = load.f64 v73 - v75 = fadd v67, v74 - v76 = x86_cvtt2si.i32 v75 - v158 = iconst.i32 0x8000_0000 - v154 = icmp ne v76, v158 - brnz v154, block11 - jump block184 - -block184: - v155 = fcmp uno v75, v75 - brz v155, block12 - jump block185 - -block185: - trap bad_toint - -block12: - v159 = iconst.i64 0xc1e0_0000_0020_0000 - v156 = bitcast.f64 v159 - v157 = fcmp ge v156, v75 - brz v157, block13 - jump block186 - -block186: - trap int_ovf - -block13: - jump block11 - -block11: - jump block5(v0, v76) - -block5(v77: f64, v78: i32): - v79 = fcvt_from_sint.f64 v78 - v160 = iconst.i64 0xbfe6_2e42_fee0_0000 - v80 = bitcast.f64 v160 - v81 = fmul v79, v80 - v82 = fadd v77, v81 - v161 = iconst.i64 0x3dea_39ef_3579_3c76 - v83 = bitcast.f64 v161 - v84 = fmul v79, v83 - v85 = fsub v82, v84 - jump block4(v82, v85, v84, v78) - -block4(v86: f64, v87: f64, v108: f64, v113: i32): - v88 = fmul v87, v87 - v162 = iconst.i64 0x3e66_3769_72be_a4d0 - v89 = bitcast.f64 v162 - v90 = fmul v88, v89 - v163 = iconst.i64 0xbeeb_bd41_c5d2_6bf1 - v91 = bitcast.f64 v163 - v92 = fadd v90, v91 - v93 = fmul v88, v92 - v164 = iconst.i64 0x3f11_566a_af25_de2c - v94 = bitcast.f64 v164 - v95 = fadd v93, v94 - v96 = fmul v88, v95 - v165 = iconst.i64 0xbf66_c16c_16be_bd93 - v97 = bitcast.f64 v165 - v98 = fadd v96, v97 - v99 = fmul v88, v98 - v166 = iconst.i64 0x3fc5_5555_5555_553e - v100 = bitcast.f64 v166 - v101 = fadd v99, v100 - v102 = fmul v88, v101 - v103 = fsub v87, v102 - v104 = fmul v87, v103 - v167 = iconst.i64 0x4000_0000_0000_0000 - v105 = bitcast.f64 v167 - v106 = fsub v105, v103 - v107 = fdiv v104, v106 - v109 = fsub v107, v108 - v110 = fadd v86, v109 - v168 = iconst.i64 0x3ff0_0000_0000_0000 - v111 = bitcast.f64 v168 - v112 = fadd v110, v111 - v169 = iconst.i32 0 - v114 = icmp eq v113, v169 - v115 = bint.i32 v114 - brnz v115, block2(v12, v112) - jump block187 - -block187: - v116 = call fn0(v112, v113, v1) - jump block2(v12, v116) - -block3: - v170 = iconst.i64 0x7fe0_0000_0000_0000 - v117 = bitcast.f64 v170 - v118 = fadd.f64 v0, v117 - v171 = uextend.i64 v12 - v172 = iadd_imm.i64 v1, 0 - v173 = load.i64 v172 - v119 = iadd v173, v171 - store v118, v119 - v174 = iconst.i64 0x3ff0_0000_0000_0000 - v120 = bitcast.f64 v174 - v121 = fadd.f64 v0, v120 - jump block2(v12, v121) - -block2(v123: i32, v130: f64): - v122 = iconst.i32 0 - v127 = iconst.i32 16 - v128 = iadd v123, v127 - v175 = uextend.i64 v122 - v176 = iadd_imm.i64 v1, 0 - v177 = load.i64 v176 - v129 = iadd v177, v175 - store v128, v129+4 - jump block1(v130) - -block1(v2: f64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif deleted file mode 100644 index 4c9b27d6b0..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-216.clif +++ /dev/null @@ -1,87 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer. -; -; The (old) coalescer creates a virtual register with two identical values. -function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v3 = iconst.i64 0 - v5 = iconst.i32 0 - brz v5, block3(v3) - jump block4(v3, v3) - -block4(v11: i64, v29: i64): - v6 = iconst.i32 0 - brz v6, block14 - jump block15 - -block15: - v9 = iconst.i32 -17 - v12 = iconst.i32 0xffff_ffff_ffff_8000 - jump block9(v12) - -block9(v10: i32): - brnz v10, block8(v9, v11, v11) - jump block16 - -block16: - brz.i32 v9, block13 - jump block17 - -block17: - v13 = iconst.i32 0 - brnz v13, block6(v11, v11) - jump block18 - -block18: - v14 = iconst.i32 0 - brz v14, block12 - jump block11 - -block12: - jump block4(v11, v11) - -block11: - jump block10(v11) - -block13: - v15 = iconst.i64 1 - jump block10(v15) - -block10(v21: i64): - v16 = iconst.i32 0 - brnz v16, block6(v21, v11) - jump block19 - -block19: - v17 = iconst.i32 0xffff_ffff_ffff_9f35 - jump block8(v17, v21, v11) - -block8(v8: i32, v23: i64, v28: i64): - jump block7(v8, v23, v28) - -block14: - v18 = iconst.i32 0 - jump block7(v18, v11, v29) - -block7(v7: i32, v22: i64, v27: i64): - jump block6(v22, v27) - -block6(v20: i64, v25: i64): - v19 = iconst.i32 0xffc7 - brnz v19, block4(v20, v25) - jump block5 - -block5: - jump block3(v25) - -block3(v24: i64): - jump block2(v24) - -block2(v4: i64): - jump block1(v4) - -block1(v2: i64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif deleted file mode 100644 index d47a905637..0000000000 --- a/cranelift/filetests/filetests/regalloc/coloring-227.clif +++ /dev/null @@ -1,115 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - - block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): -[RexOp1pu_id#b8] v5 = iconst.i32 0 -[RexOp1pu_id#b8] v6 = iconst.i32 0 -[RexOp1tjccb#74] brz v6, block10 -[Op1jmpb#eb] jump block3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) - - block3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32): -[Op1jmpb#eb] jump block6 - - block6: -[RexOp1pu_id#b8] v8 = iconst.i32 0 -[RexOp1tjccb#75] brnz v8, block5 -[Op1jmpb#eb] jump block20 - - block20: -[RexOp1pu_id#b8] v9 = iconst.i32 0 -[RexOp1pu_id#b8] v11 = iconst.i32 0 -[DynRexOp1icscc#39] v12 = icmp.i32 eq v15, v11 -[RexOp2urm_noflags#4b6] v13 = bint.i32 v12 -[DynRexOp1rr#21] v14 = band v9, v13 -[RexOp1tjccb#75] brnz v14, block6 -[Op1jmpb#eb] jump block7 - - block7: -[RexOp1tjccb#74] brz.i32 v17, block8 -[Op1jmpb#eb] jump block17 - - block17: -[RexOp1pu_id#b8] v18 = iconst.i32 0 -[RexOp1tjccb#74] brz v18, block9 -[Op1jmpb#eb] jump block16 - - block16: -[RexOp1pu_id#b8] v21 = iconst.i32 0 -[RexOp1umr#89] v79 = uextend.i64 v5 -[RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v81 = load.i64 v80 -[RexOp1rr#8001] v22 = iadd v81, v79 -[RexMp1st#189] istore16 v21, v22 -[Op1jmpb#eb] jump block9 - - block9: -[Op1jmpb#eb] jump block8 - - block8: -[RexOp1pu_id#b8] v27 = iconst.i32 3 -[RexOp1pu_id#b8] v28 = iconst.i32 4 -[DynRexOp1rr#09] v35 = bor.i32 v31, v13 -[RexOp1tjccb#75] brnz v35, block15(v27) -[Op1jmpb#eb] jump block15(v28) - - block15(v36: i32): -[Op1jmpb#eb] jump block3(v25, v36, v25, v31, v40, v47, v54, v61, v68, v75) - - block5: -[Op1jmpb#eb] jump block4 - - block4: -[Op1jmpb#eb] jump block2(v40, v47, v54, v61, v68, v75) - - block10: -[RexOp1pu_id#b8] v43 = iconst.i32 0 -[Op1jmpb#eb] jump block2(v43, v5, v0, v1, v2, v3) - - block2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): -[RexOp1pu_id#b8] v44 = iconst.i32 0 -[RexOp1tjccb#74] brz v44, block12 -[Op1jmpb#eb] jump block18 - - block18: -[RexOp1pu_id#b8] v50 = iconst.i32 11 -[RexOp1tjccb#74] brz v50, block14 -[Op1jmpb#eb] jump block19 - - block19: -[RexOp1umr#89] v82 = uextend.i64 v52 -[RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v84 = load.i64 v83 -[RexOp1rr#8001] v57 = iadd v84, v82 -[RexOp1ld#8b] v58 = load.i32 v57 -[RexOp1umr#89] v85 = uextend.i64 v58 -[RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v87 = load.i64 v86 -[RexOp1rr#8001] v64 = iadd v87, v85 -[RexOp1st#88] istore8 v59, v64 -[RexOp1pu_id#b8] v65 = iconst.i32 0 -[Op1jmpb#eb] jump block13(v65) - - block14: -[Op1jmpb#eb] jump block13(v66) - - block13(v51: i32): -[RexOp1umr#89] v88 = uextend.i64 v45 -[RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v90 = load.i64 v89 -[RexOp1rr#8001] v71 = iadd v90, v88 -[RexOp1st#89] store v51, v71 -[Op1jmpb#eb] jump block12 - - block12: -[Op1jmpb#eb] jump block11 - - block11: -[Op1jmpb#eb] jump block1 - - block1: -[Op1ret#c3] return -} diff --git a/cranelift/filetests/filetests/regalloc/constraints.clif b/cranelift/filetests/filetests/regalloc/constraints.clif deleted file mode 100644 index 60cd731ed8..0000000000 --- a/cranelift/filetests/filetests/regalloc/constraints.clif +++ /dev/null @@ -1,82 +0,0 @@ -test regalloc -target i686 - -; regex: V=v\d+ -; regex: REG=%r([abcd]x|[sd]i) - -; Tied operands, both are killed at instruction. -function %tied_easy() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; not: copy - ; check: isub - v2 = isub v0, v1 - return v2 -} - -; Tied operand is live after instruction. -function %tied_alive() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; check: $(v0c=$V) = copy v0 - ; check: v2 = isub $v0c, v1 - v2 = isub v0, v1 - ; check: v3 = iadd v2, v0 - v3 = iadd v2, v0 - return v3 -} - -; Fixed register constraint. -function %fixed_op() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - return v2 -} - -; Fixed register constraint twice. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - ; check: regmove v0, %rcx -> $REG - ; check: regmove v2, $REG -> %rcx - v3 = ishl v0, v2 - - return v3 -} - -; Tied use of a diverted register. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - ; check: v2 = ishl v1, v0 - v2 = ishl v1, v0 - - ; Now v0 is globally allocated to %rax, but diverted to %rcx. - ; Check that the tied def gets the diverted register. - v3 = isub v0, v2 - ; not: regmove - ; check: ,%rcx] - ; sameln: isub - ; Move it into place for the return value. - ; check: regmove v3, %rcx -> %rax - return v3 -} diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif deleted file mode 100644 index 90650aa4f0..0000000000 --- a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test that fallthrough returns are visited by reload and coloring. - -function %foo() -> f64 { - fn0 = %bar() - -block0: - v0 = f64const 0.0 - call fn0() - fallthrough_return v0 -} -; check: fill v0 - -function %foo() -> f64 { - fn0 = %bar() -> f64, f64 - -block0: - v0, v1 = call fn0() - fallthrough_return v1 -} -; check: regmove v1, %xmm1 -> %xmm0 diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif deleted file mode 100644 index 1d569727dd..0000000000 --- a/cranelift/filetests/filetests/regalloc/ghost-param.clif +++ /dev/null @@ -1,45 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; This test case would create a block parameter that was a ghost value. -; The coalescer would insert a copy of the ghost value, leading to verifier errors. -; -; We don't allow block parameters to be ghost values any longer. -; -; Test case by binaryen fuzzer! - -function %pr215(i64 vmctx [%rdi]) system_v { -block0(v0: i64): - v10 = iconst.i64 0 - v1 = bitcast.f64 v10 - jump block5(v1) - -block5(v9: f64): - v11 = iconst.i64 0xffff_ffff_ff9a_421a - v4 = bitcast.f64 v11 - v6 = iconst.i32 0 - v7 = iconst.i32 1 - brnz v7, block4(v6) - jump block8 - -block8: - v8 = iconst.i32 0 - jump block7(v8) - -block7(v5: i32): - brnz v5, block3(v4) - jump block5(v4) - -block4(v3: i32): - brnz v3, block2 - jump block3(v9) - -block3(v2: f64): - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif deleted file mode 100644 index 1fe89ae823..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-constraints.clif +++ /dev/null @@ -1,30 +0,0 @@ -test regalloc -target i686 legacy - -; This test covers the troubles when values with global live ranges are defined -; by instructions with constrained register classes. -; -; The icmp_imm instrutions write their b1 result to the ABCD register class on -; 32-bit x86. So if we define 5 live values, they can't all fit. -function %global_constraints(i32) { -block0(v0: i32): - v1 = icmp_imm eq v0, 1 - v2 = icmp_imm ugt v0, 2 - v3 = icmp_imm sle v0, 3 - v4 = icmp_imm ne v0, 4 - v5 = icmp_imm sge v0, 5 - brnz v5, block1 - jump block2 - -block2: - return - -block1: - ; Make sure v1-v5 are live in. - v10 = band v1, v2 - v11 = bor v3, v4 - v12 = bor v10, v11 - v13 = bor v12, v5 - trapnz v13, user0 - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif deleted file mode 100644 index 6d31f7511a..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-fixed.clif +++ /dev/null @@ -1,16 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %foo() system_v { -block4: - v3 = iconst.i32 0 - jump block3 - -block3: - v9 = udiv v3, v3 - jump block1 - -block1: - v19 = iadd.i32 v9, v9 - jump block3 -} diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif deleted file mode 100644 index c4534b0f8b..0000000000 --- a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif +++ /dev/null @@ -1,44 +0,0 @@ -test regalloc -target x86_64 legacy - -function u0:587() fast { -block0: - v97 = iconst.i32 0 - v169 = iconst.i32 0 - v1729 = iconst.i32 0 - jump block100(v97, v97, v97, v97, v97) - -block100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32): - v1762 = iconst.i32 0 - v1769 = iconst.i32 0 - v1774 = iconst.i32 0 - v1864 = iconst.i32 0 - v1897 = iconst.i32 0 - jump block102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169) - -block102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32): - v1929 = iconst.i32 0 - v1943 = iconst.i32 0 - v1949 = iconst.i32 0 - jump block123(v1897, v1769) - -block123(v1950: i32, v1979: i32): - v1955 = iconst.i32 0 - brz v1955, block125 - jump block122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901) - -block125: - v1961 = iadd_imm.i32 v1949, 0 - v1952 = iconst.i32 0 - v1962 = iconst.i64 0 - v1963 = load.i32 v1962 - brz v1963, block123(v1952, v1961) - jump block127 - -block127: - v1966 = iconst.i32 0 - jump block122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966) - -block122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32): - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/infinite-interference.clif b/cranelift/filetests/filetests/regalloc/infinite-interference.clif deleted file mode 100644 index b7a7736405..0000000000 --- a/cranelift/filetests/filetests/regalloc/infinite-interference.clif +++ /dev/null @@ -1,37 +0,0 @@ -test regalloc -target riscv32 - -; Here, the coalescer initially builds vreg0 = [v1, v2, v3] -; -; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to -; resolve that conflict since v1 will just interfere with the inserted copy too. - -;function %c1(i32) -> i32 { -;block0(v0: i32): -; v1 = iadd_imm v0, 1 -; v2 = iconst.i32 1 -; brz v1, block1(v2) -; jump block2 -; -;block1(v3: i32): -; return v3 -; -;block2: -; jump block1(v1) -;} - -; Same thing with v1 and v2 swapped to reverse the order of definitions. - -function %c2(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 1 - v2 = iconst.i32 1 - brz v2, block1(v1) - jump block2 - -block1(v3: i32): - return v3 - -block2: - jump block1(v2) -} diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif deleted file mode 100644 index 3272199bca..0000000000 --- a/cranelift/filetests/filetests/regalloc/iterate.clif +++ /dev/null @@ -1,164 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { -block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): - v32 = iconst.i32 0 - v6 = bitcast.f32 v32 - v7 = iconst.i64 0 - v33 = iconst.i64 0 - v8 = bitcast.f64 v33 - v34 = iconst.i32 0xbe99_999a - v9 = bitcast.f32 v34 - v10 = iconst.i32 40 - v11 = iconst.i32 -7 - v35 = iconst.i32 0x40b0_0000 - v12 = bitcast.f32 v35 - v13 = iconst.i64 6 - v36 = iconst.i64 0x4020_0000_0000_0000 - v14 = bitcast.f64 v36 - v44 = iconst.i64 0 - v37 = icmp slt v0, v44 - brnz v37, block2 - jump block11 - -block11: - v38 = fcvt_from_sint.f64 v0 - jump block3(v38) - -block2: - v45 = iconst.i32 1 - v39 = ushr.i64 v0, v45 - v40 = band_imm.i64 v0, 1 - v41 = bor v39, v40 - v42 = fcvt_from_sint.f64 v41 - v43 = fadd v42, v42 - jump block3(v43) - -block3(v15: f64): - v16 = fpromote.f64 v9 - v46 = uextend.i64 v10 - v17 = fcvt_from_sint.f64 v46 - v18 = fcvt_from_sint.f64 v11 - v19 = fpromote.f64 v12 - v54 = iconst.i64 0 - v47 = icmp.i64 slt v13, v54 - brnz v47, block4 - jump block12 - -block12: - v48 = fcvt_from_sint.f64 v13 - jump block5(v48) - -block4: - v55 = iconst.i32 1 - v49 = ushr.i64 v13, v55 - v50 = band_imm.i64 v13, 1 - v51 = bor v49, v50 - v52 = fcvt_from_sint.f64 v51 - v53 = fadd v52, v52 - jump block5(v53) - -block5(v20: f64): - v63 = iconst.i64 0 - v56 = icmp.i64 slt v7, v63 - brnz v56, block6 - jump block13 - -block13: - v57 = fcvt_from_sint.f64 v7 - jump block7(v57) - -block6: - v64 = iconst.i32 1 - v58 = ushr.i64 v7, v64 - v59 = band_imm.i64 v7, 1 - v60 = bor v58, v59 - v61 = fcvt_from_sint.f64 v60 - v62 = fadd v61, v61 - jump block7(v62) - -block7(v21: f64): - v22 = fadd v21, v14 - v23 = fadd.f64 v20, v22 - v24 = fadd.f64 v19, v23 - v25 = fadd.f64 v18, v24 - v26 = fadd.f64 v17, v25 - v27 = fadd.f64 v2, v26 - v28 = fadd.f64 v16, v27 - v29 = fadd.f64 v15, v28 - v30 = x86_cvtt2si.i64 v29 - v69 = iconst.i64 0x8000_0000_0000_0000 - v65 = icmp ne v30, v69 - brnz v65, block8 - jump block15 - -block15: - v66 = fcmp uno v29, v29 - brz v66, block9 - jump block16 - -block16: - trap bad_toint - -block9: - v70 = iconst.i64 0xc3e0_0000_0000_0000 - v67 = bitcast.f64 v70 - v68 = fcmp gt v67, v29 - brz v68, block10 - jump block17 - -block17: - trap int_ovf - -block10: - jump block8 - -block8: - jump block1(v30) - -block1(v31: i64): - return v31 -} - -function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, 48 - sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v - -block0(v0: i64): - v1 = iconst.i32 32 - v2 = iconst.i64 64 - v3 = iconst.i32 9 - v4 = iconst.i64 1063 - v5 = iadd_imm v0, 48 - v6 = load.i32 v5 - v7 = icmp uge v3, v6 - ; If we're unlucky, there are no ABCD registers available for v7 at this branch. - brz v7, block2 - jump block4 - -block4: - trap heap_oob - -block2: - v8 = load.i64 v5+8 - v9 = uextend.i64 v3 - v16 = iconst.i64 16 - v10 = imul v9, v16 - v11 = iadd v8, v10 - v12 = load.i64 v11 - brnz v12, block3 - jump block5 - -block5: - trap icall_null - -block3: - v13 = load.i64 v11+8 - v14 = call_indirect.i64 sig0, v12(v1, v2, v13, v4) - jump block1(v14) - -block1(v15: i64): - return v15 -} diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif deleted file mode 100644 index 0a6b160f09..0000000000 --- a/cranelift/filetests/filetests/regalloc/multi-constraints.clif +++ /dev/null @@ -1,51 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Test combinations of constraints. -; -; The x86 ushr instruction requires its second operand to be passed in %rcx and its output is -; tied to the first input operand. -; -; If we pass the same value to both operands, both constraints must be satisfied. - -; Found by the Binaryen fuzzer in PR221. -; -; Conditions triggering the problem: -; -; - The same value used for a tied operand and a fixed operand. -; - The common value is already in %rcx. -; - The tied output value is live outside the block. -; -; Under these conditions, Solver::add_tied_input() would create a variable for the tied input -; without considering the fixed constraint. -function %pr221(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - v4 = ushr v3, v3 - jump block1 - -block1: - return v4 -} - -; Found by the Binaryen fuzzer in PR218. -; -; This is a similar situation involving combined constraints on the ushr instruction: -; -; - The %rcx register is already in use by a globally live value. -; - The ushr x, x result is also a globally live value. -; -; Since the ushr x, x result is forced to be placed in %rcx, we must set the replace_global_defines -; flag so it can be reassigned to a different global register. -function %pr218(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - ; check: regmove v3, %rcx -> - v4 = ushr v0, v0 - ; check: v4 = copy - jump block1 - -block1: - ; v3 is globally live in %rcx. - ; v4 is also globally live. Needs to be assigned something else for the trip across the CFG edge. - v5 = iadd v3, v4 - return v5 -} diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif deleted file mode 100644 index 8825a4df72..0000000000 --- a/cranelift/filetests/filetests/regalloc/multiple-returns.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Return the same value twice. This needs a copy so that each value can be -; allocated its own register. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: return v2, v3 - -; Same thing, now with a fallthrough_return. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - fallthrough_return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: fallthrough_return v2, v3 diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif deleted file mode 100644 index 1ba797f6c8..0000000000 --- a/cranelift/filetests/filetests/regalloc/output-interference.clif +++ /dev/null @@ -1,14 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %test(i64) -> i64 system_v { -block0(v0: i64): - v2 = iconst.i64 12 - ; This division clobbers two of its fixed input registers on x86. - ; These are FixedTied constraints that the spiller needs to resolve. - v5 = udiv v0, v2 - v6 = iconst.i64 13 - v9 = udiv v0, v6 - v10 = iadd v5, v9 - return v10 -} diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif deleted file mode 100644 index 5e6a7e9864..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-208.clif +++ /dev/null @@ -1,112 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; regex: V=v\d+ -; regex: BB=block\d+ - -; Filed as https://github.com/bytecodealliance/cranelift/issues/208 -; -; The verifier complains about a branch argument that is not in the same virtual register as the -; corresponding block argument. -; -; The problem was the reload pass rewriting block arguments on "brnz v9, block3(v9)" - -function %pr208(i64 vmctx [%rdi]) system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v - sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v - fn0 = u0:1 sig0 - fn1 = u0:3 sig1 - -block0(v0: i64): - v1 = iconst.i32 0 - v2 = call fn0(v0) - v20 = iconst.i32 0x4ffe - v16 = icmp uge v2, v20 - brz v16, block5 - jump block9 - -block9: - trap heap_oob - -block5: - v17 = uextend.i64 v2 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v3 = iadd v19, v17 - v4 = load.i32 v3 - v21 = iconst.i32 0 - v5 = icmp eq v4, v21 - v6 = bint.i32 v5 - brnz v6, block2 - jump block3(v4) - - ; check: block5: - ; check: jump block3(v4) - ; check: $(splitEdge=$BB): - ; nextln: jump block3(v9) - -block3(v7: i32): - call fn1(v0, v7) - v26 = iconst.i32 0x4ffe - v22 = icmp uge v7, v26 - brz v22, block6 - jump block10 - -block10: - trap heap_oob - -block6: - v23 = uextend.i64 v7 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v8 = iadd v25, v23 - v9 = load.i32 v8+56 - ; check: v9 = spill - ; check: brnz $V, $splitEdge - brnz v9, block3(v9) - jump block4 - -block4: - jump block2 - -block2: - v10 = iconst.i32 0 - v31 = iconst.i32 0x4ffe - v27 = icmp uge v10, v31 - brz v27, block7 - jump block11 - -block11: - trap heap_oob - -block7: - v28 = uextend.i64 v10 - v29 = iadd_imm.i64 v0, -8 - v30 = load.i64 v29 - v11 = iadd v30, v28 - v12 = load.i32 v11+12 - call fn1(v0, v12) - v13 = iconst.i32 0 - v36 = iconst.i32 0x4ffe - v32 = icmp uge v13, v36 - brz v32, block8 - jump block12 - -block12: - trap heap_oob - -block8: - v33 = uextend.i64 v13 - v34 = iadd_imm.i64 v0, -8 - v35 = load.i64 v34 - v14 = iadd v35, v33 - v15 = load.i32 v14+12 - call fn1(v0, v15) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif deleted file mode 100644 index 5dafe32b5c..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-779.clif +++ /dev/null @@ -1,23 +0,0 @@ -test compile -target x86_64 legacy - -; Filed as https://github.com/bytecodealliance/cranelift/issues/779 -; -; The copy_nop optimisation to reload (see Issue 773) was creating -; copy_nop instructions for types for which there were no encoding. - -function u0:0(i64, i64, i64) system_v { - sig0 = () system_v - sig1 = (i16) system_v - fn1 = u0:94 sig0 - fn2 = u0:95 sig1 - -block0(v0: i64, v1: i64, v2: i64): - v3 = iconst.i16 0 - jump block1(v3) - -block1(v4: i16): - call fn1() - call fn2(v4) - jump block1(v4) -} diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif deleted file mode 100644 index 1ae755a988..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload.clif +++ /dev/null @@ -1,46 +0,0 @@ -test regalloc -target riscv32 legacy enable_e - -; regex: V=v\d+ - -; Check that we can handle a function return value that got spilled. -function %spill_return() -> i32 { - fn0 = %foo() -> i32 system_v - -block0: - v0 = call fn0() - ; check: $(reg=$V) = call fn0 - ; check: v0 = spill $reg - v2 = call fn0() - ; check: v2 = call fn0 - return v0 - ; check: $(reload=$V) = fill v0 - ; check: return $reload -} - -; Check that copies where the arg has been spilled are replaced with fills. -; -; RV32E has 6 registers for function arguments so the 7th, v6, will be placed -; on the stack. -function %spilled_copy_arg(i32, i32, i32, i32, i32, i32, i32) -> i32 { - -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): - ; not: copy - ; check: v10 = fill v6 - v10 = copy v6 - return v10 -} - -; Check that copies where the result has been spilled are replaced with spills. -; -; v1 is live across a call so it will be spilled. -function %spilled_copy_result(i32) -> i32 { - fn0 = %foo(i32) - -block0(v0: i32): - ; not: copy - ; check: v1 = spill v0 - v1 = copy v0 - call fn0(v1) - return v1 -} diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif deleted file mode 100644 index 701a91a15a..0000000000 --- a/cranelift/filetests/filetests/regalloc/schedule-moves.clif +++ /dev/null @@ -1,39 +0,0 @@ -test regalloc -target i686 legacy haswell - -function %pr165() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v0 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - return -} - -; Same as above, but use so many registers that spilling is required. -; Note: This is also a candidate for using xchg instructions. -function %emergency_spill() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v3 = iconst.i32 0x3102_0304 - v4 = iconst.i32 0x4102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v3 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - istore8 v3, v4+0x2710 - return -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif deleted file mode 100644 index b280db086f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif +++ /dev/null @@ -1,100 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v236 = iconst.i32 0x4de9_bd37 - v424 = iconst.i32 0 - jump block37(v424) - -block37(v65: i32): - v433 = iconst.i32 0 - jump block40(v433) - -block40(v70: i32): - v75 = iconst.i32 0 - v259 = iconst.i32 0 - v78 -> v259 - v449 = iconst.i32 0 - v450, v451 = x86_sdivmodx v75, v449, v259 - v79 -> v450 - v269 = iconst.i32 0 - v270 = ushr_imm v269, 31 - v271 = iadd v269, v270 - v98 -> v271 - v100 = iconst.i32 -31 - v272 = iconst.i32 0x4de9_bd37 - v490, v273 = x86_smulx v100, v272 - v493 = iconst.i32 0 - jump block61(v493) - -block61(v103: i32): - v104 = iconst.i32 -23 - v105 = iconst.i32 -23 - v106 = popcnt v105 - v500 = sshr_imm v104, 31 - v501 = iconst.i32 0 - jump block64(v501) - -block64(v107: i32): - v108 = iconst.i32 0 - v109 = iconst.i32 0 - v278 = iconst.i32 0 - v507, v279 = x86_smulx v109, v278 - v280 = isub v279, v109 - v281 = sshr_imm v280, 11 - v282 = iconst.i32 0 - v283 = iadd v281, v282 - v111 -> v283 - v112 = rotr v108, v283 - jump block65 - -block65: - v509 = iconst.i32 0 - v510, v511 = x86_sdivmodx v107, v509, v112 - v113 -> v510 - v114 = iconst.i32 0 - v517 = iconst.i32 0 - v518, v519 = x86_sdivmodx v103, v517, v114 - v115 -> v518 - v534 = iconst.i32 0 - v122 -> v534 - v541 = iconst.i32 0 - v542, v543 = x86_sdivmodx v271, v541, v122 - v123 -> v542 - v289 = iconst.i32 0 - v125 -> v289 - v550 = iconst.i32 0 - v551, v552 = x86_sdivmodx v79, v550, v289 - v126 -> v551 - v130 = iconst.i32 0 - v558 = iconst.i32 0 - v559, v560 = x86_sdivmodx v70, v558, v130 - v131 -> v559 - v305 = iconst.i32 0 - v140 -> v305 - v577 = iconst.i32 0 - v578, v579 = x86_sdivmodx v65, v577, v305 - v141 -> v578 - v166 = iconst.i32 0 - v167 = iconst.i32 -31 - v318 = iconst.i32 0x4de9_bd37 - v650, v319 = x86_smulx v167, v318 - v320 = isub v319, v167 - v321 = sshr_imm v320, 4 - v322 = iconst.i32 0 - v323 = iadd v321, v322 - v169 -> v323 - v652 = iconst.i32 0 - v653, v654 = x86_sdivmodx v166, v652, v323 - v170 -> v653 - v171 = iconst.i32 -23 - v172 = iconst.i32 -23 - v173 = popcnt v172 - v174 = popcnt v173 - v660 = sshr_imm v171, 31 - v661, v662 = x86_sdivmodx v171, v660, v174 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif deleted file mode 100644 index 1c2d1b2bc0..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif +++ /dev/null @@ -1,137 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v5 = iconst.i32 -8 - v114 = iconst.i32 0 - v16 = iconst.i32 -8 - v17 = popcnt v16 - v192 = ifcmp_imm v17, -1 - trapif ne v192, user0 - jump block12 - -block12: - v122 = iconst.i32 0 - v123 = ushr_imm v122, 31 - v124 = iadd v122, v123 - v20 -> v124 - v25 = iconst.i32 -19 - v204 = iconst.i32 0 - v31 -> v204 - v210 = ifcmp_imm v31, -1 - trapif ne v210, user0 - jump block18 - -block18: - v215 = iconst.i32 0 - jump block19(v215) - -block19(v32: i32): - v35 = iconst.i32 0 - v218 = ifcmp_imm v35, -1 - trapif ne v218, user0 - jump block21 - -block21: - v223 = iconst.i32 0 - jump block22(v223) - -block22(v36: i32): - v136 = iconst.i32 0 - v40 -> v136 - v227 = ifcmp_imm v136, -1 - trapif ne v227, user0 - jump block24 - -block24: - v232 = iconst.i32 0 - jump block25(v232) - -block25(v41: i32): - v142 = iconst.i32 0 - v45 -> v142 - v236 = ifcmp_imm v142, -1 - trapif ne v236, user0 - jump block27 - -block27: - v241 = iconst.i32 0 - jump block28(v241) - -block28(v46: i32): - v49 = iconst.i32 0 - v244 = ifcmp_imm v49, -1 - trapif ne v244, user0 - jump block30 - -block30: - v254 = iconst.i32 0 - v53 -> v254 - v54 = iconst.i32 -23 - v55 = popcnt v54 - v143 = iconst.i32 0x4de9_bd37 - v260, v144 = x86_smulx v55, v143 - v145 = iconst.i32 0 - v146 = sshr_imm v145, 4 - v147 = iconst.i32 0 - v148 = iadd v146, v147 - v57 -> v148 - v58 = ishl v53, v148 - jump block35 - -block35: - v262 = iconst.i32 0 - v263, v264 = x86_sdivmodx v46, v262, v58 - v59 -> v263 - v270 = iconst.i32 0 - v271, v272 = x86_sdivmodx v41, v270, v59 - v60 -> v271 - v61 = f32const 0.0 - v280 = iconst.i32 0 - v281 = ffcmp v61, v61 - trapff ord v281, user0 - jump block41(v280) - -block41(v62: i32): - v157 = iconst.i32 0 - v158 = sshr_imm v157, 4 - v159 = iconst.i32 0 - v160 = iadd v158, v159 - v75 -> v160 - v308 = ifcmp_imm v160, -1 - trapif ne v308, user0 - jump block52 - -block52: - v87 = iconst.i32 -23 - v88 = iconst.i32 -23 - v89 = popcnt v88 - v161 = iconst.i32 0x4de9_bd37 - v324, v162 = x86_smulx v89, v161 - v163 = isub v162, v89 - v164 = sshr_imm v163, 4 - v165 = iconst.i32 0 - v166 = iadd v164, v165 - v91 -> v166 - v326 = iconst.i32 0 - v327, v328 = x86_sdivmodx v87, v326, v166 - v92 -> v327 - v351 = iconst.i32 0 - v99 -> v351 - v358 = iconst.i32 0 - v359, v360 = x86_sdivmodx v36, v358, v99 - v100 -> v359 - v102 = iconst.i32 0 - v103 = rotr.i32 v32, v102 - v366 = iconst.i32 0 - v367, v368 = x86_sdivmodx v25, v366, v103 - v104 -> v367 - v383 = iconst.i32 0 - v107 -> v383 - v390 = iconst.i32 0 - v391, v392 = x86_sdivmodx v124, v390, v107 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif deleted file mode 100644 index 1aec10354f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif +++ /dev/null @@ -1,173 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123 - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v351 = iconst.i32 0x4de9_bd37 - v31 = iconst.i32 -23 - v35 = iconst.i32 0 - v36 = iconst.i32 -31 - v357 = iconst.i32 0x4de9_bd37 - v530, v358 = x86_smulx v36, v357 - v359 = isub v358, v36 - v360 = sshr_imm v359, 4 - v361 = iconst.i32 0 - v362 = iadd v360, v361 - v38 -> v362 - v532 = sshr_imm v35, 31 - v533, v534 = x86_sdivmodx v35, v532, v362 - v39 -> v533 - v53 = iconst.i32 0 - v547 = ifcmp_imm v53, -1 - trapif ne v547, user0 - jump block30 - -block30: - v75 = iconst.i32 0 - v581 = ifcmp_imm v75, -1 - trapif ne v581, user0 - jump block42 - -block42: - v136 = iconst.i32 0 - v691 = ifcmp_imm v136, -1 - trapif ne v691, user0 - jump block81 - -block81: - v158 = iconst.i32 0 - v725 = ifcmp_imm v158, -1 - trapif ne v725, user0 - jump block93 - -block93: - v760 = iconst.i32 0 - jump block106(v760) - -block106(v175: i32): - v179 = iconst.i32 0 - v180 = icmp_imm eq v179, 0 - v183 = iconst.i32 0 - v766 = ifcmp_imm v183, -1 - trapif ne v766, user0 - jump block108 - -block108: - v771 = iconst.i32 0 - jump block109(v771) - -block109(v184: i32): - v785 = iconst.i32 0 - v193 -> v785 - v791 = ifcmp_imm v193, -1 - trapif ne v791, user0 - jump block117 - -block117: - v796 = iconst.i32 0 - jump block118(v796) - -block118(v194: i32): - v203 = iconst.i32 -63 - v809 = iconst.i32 0 - v207 -> v809 - v815 = ifcmp_imm v207, -1 - trapif ne v815, user0 - jump block126 - -block126: - v209 = iconst.i32 0 - v823 = ifcmp_imm v209, -1 - trapif ne v823, user0 - jump block129 - -block129: - v213 = iconst.i32 -23 - v214 = iconst.i32 -19 - v215 = icmp_imm eq v214, 0 - v216 = bint.i32 v215 - v217 = popcnt v216 - v435 = iconst.i32 0x7df7_df7d - v831, v436 = x86_smulx v217, v435 - v437 = isub v436, v217 - v438 = sshr_imm v437, 5 - v439 = ushr_imm v438, 31 - v440 = iadd v438, v439 - v219 -> v440 - v220 = rotr v213, v440 - v229 = iconst.i32 0 - v841 = iconst.i32 0 - v842, v843 = x86_sdivmodx v194, v841, v229 - v230 -> v842 - v849 = iconst.i32 0 - v850, v851 = x86_sdivmodx v184, v849, v230 - v231 -> v850 - v232 = iconst.i32 0 - v857 = iconst.i32 0 - v858, v859 = x86_sdivmodx v175, v857, v232 - v233 -> v858 - v915 = iconst.i32 0 - jump block163(v915) - -block163(v253: i32): - v255 = iconst.i32 0 - v256 = iconst.i32 -23 - v257 = iconst.i32 -19 - v258 = icmp_imm eq v257, 0 - v259 = bint.i32 v258 - v260 = popcnt v259 - v447 = iconst.i32 0x7df7_df7d - v921, v448 = x86_smulx v260, v447 - v449 = isub v448, v260 - v450 = sshr_imm v449, 5 - v451 = ushr_imm v450, 31 - v452 = iadd v450, v451 - v262 -> v452 - v263 = rotr v256, v452 - v264 = popcnt v263 - v265 = popcnt v264 - v266 = popcnt v265 - v267 = rotr v255, v266 - v268 = popcnt v267 - v923 = iconst.i32 0 - v924, v925 = x86_sdivmodx v253, v923, v268 - v269 -> v924 - v276 = iconst.i32 0 - v277 = iconst.i32 -63 - v278 = popcnt v277 - v947 = iconst.i32 0 - v948, v949 = x86_sdivmodx v276, v947, v278 - v279 -> v948 - v309 = iconst.i32 0 - v310 = iconst.i32 0 - v311 = iconst.i32 0 - v312 = icmp_imm eq v311, 0 - v313 = bint.i32 v312 - v314 = rotr v310, v313 - v315 = iconst.i32 -31 - v464 = iconst.i32 0 - v1020, v465 = x86_smulx v315, v464 - v466 = isub v465, v315 - v467 = sshr_imm v466, 4 - v468 = iconst.i32 0 - v469 = iadd v467, v468 - v317 -> v469 - v1022 = iconst.i32 0 - v1023, v1024 = x86_sdivmodx v314, v1022, v469 - v318 -> v1023 - v320 = iconst.i32 0 - v321 = iconst.i32 -19 - v322 = popcnt v321 - v1030 = iconst.i32 0 - v1031, v1032 = x86_sdivmodx v320, v1030, v322 - v323 -> v1031 - v1047 = iconst.i32 0 - v325 -> v1047 - v1054 = sshr_imm v309, 31 - v1055, v1056 = x86_sdivmodx v309, v1054, v325 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif deleted file mode 100644 index e3540f6a59..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill-noregs.clif +++ /dev/null @@ -1,175 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test case found by the Binaryen fuzzer. -; -; The spiller panics with a -; 'Ran out of GPR registers when inserting copy before v68 = icmp.i32 eq v66, v67', -; cranelift-codegen/src/regalloc/spilling.rs:425:28 message. -; -; The process_reg_uses() function is trying to insert a copy before the icmp instruction in block4 -; and runs out of registers to spill. Note that block7 has a lot of dead parameter values. -; -; The spiller was not releasing register pressure for dead block parameters. - -function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v2 = iconst.i32 0 - v3 = iconst.i64 0 - v4 = iconst.i32 0xffff_ffff_bb3f_4a2c - brz v4, block5 - jump block1 - -block1: - v5 = iconst.i32 0 - v6 = copy.i64 v3 - v7 = copy.i64 v3 - v8 = copy.i64 v3 - v9 = copy.i64 v3 - v10 = copy.i64 v3 - v11 = copy.i64 v3 - v12 = copy.i64 v3 - v13 = copy.i64 v3 - v14 = copy.i64 v3 - v15 = copy.i64 v3 - v16 = copy.i64 v3 - brnz v5, block4(v2, v3, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) - jump block2 - -block2: - v17 = iconst.i32 0 - v18 = copy.i64 v3 - v19 = copy.i64 v3 - v20 = copy.i64 v3 - v21 = copy.i64 v3 - v22 = copy.i64 v3 - v23 = copy.i64 v3 - v24 = copy.i64 v3 - v25 = copy.i64 v3 - v26 = copy.i64 v3 - v27 = copy.i64 v3 - v28 = copy.i64 v3 - brnz v17, block4(v2, v3, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) - jump block3 - -block3: - jump block1 - -block4(v29: i32, v30: i64, v31: i64, v32: i64, v33: i64, v34: i64, v35: i64, v36: i64, v37: i64, v38: i64, v39: i64, v40: i64, v41: i64): - jump block7(v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) - -block5: - jump block6 - -block6: - v42 = copy.i64 v3 - v43 = copy.i64 v3 - v44 = copy.i64 v3 - v45 = copy.i64 v3 - v46 = copy.i64 v3 - v47 = copy.i64 v3 - v48 = copy.i64 v3 - v49 = copy.i64 v3 - v50 = copy.i64 v3 - v51 = copy.i64 v3 - v52 = copy.i64 v3 - jump block7(v2, v3, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) - -block7(v53: i32, v54: i64, v55: i64, v56: i64, v57: i64, v58: i64, v59: i64, v60: i64, v61: i64, v62: i64, v63: i64, v64: i64, v65: i64): - v66 = iconst.i32 0 - v67 = iconst.i32 0 - v68 = icmp eq v66, v67 - v69 = bint.i32 v68 - jump block8 - -block8: - jump block9 - -block9: - v70 = iconst.i32 0xffff_ffff_ffff_912f - brz v70, block10 - jump block35 - -block10: - v71 = iconst.i32 0 - brz v71, block11 - jump block27 - -block11: - jump block12 - -block12: - jump block13 - -block13: - jump block14 - -block14: - jump block15 - -block15: - jump block16 - -block16: - jump block17 - -block17: - jump block18 - -block18: - jump block19 - -block19: - jump block20 - -block20: - jump block21 - -block21: - jump block22 - -block22: - jump block23 - -block23: - jump block24 - -block24: - jump block25 - -block25: - jump block26 - -block26: - jump block27 - -block27: - jump block28 - -block28: - jump block29 - -block29: - jump block30 - -block30: - jump block31 - -block31: - jump block32 - -block32: - jump block33 - -block33: - jump block34 - -block34: - jump block35 - -block35: - jump block36 - -block36: - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif deleted file mode 100644 index 2a3f2ad959..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill.clif +++ /dev/null @@ -1,223 +0,0 @@ -test regalloc - -; Test the spiler on an ISA with few registers. -; RV32E has 16 registers, where: -; - %x0 is hardwired to zero. -; - %x1 is the return address. -; - %x2 is the stack pointer. -; - %x3 is the global pointer. -; - %x4 is the thread pointer. -; - %x10-%x15 are function arguments. -; -; regex: V=v\d+ -; regex: WS=\s+ - -target riscv32 legacy enable_e - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %pyramid(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -; not: spill_slot -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - ; not: spill - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - v33 = iadd v13, v14 - ; check: iadd v13 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - ; check: $(r2v2=$V) = fill v2 - ; check: v22 = iadd v23, $r2v2 - v21 = iadd v22, v1 - ; check: $(r2v1=$V) = fill v1 - ; check: v21 = iadd v22, $r2v1 - ; check: $(rlink2=$V) = fill $link - return v21 - ; check: return v21, $rlink2 -} - -; All values live across a call must be spilled -function %across_call(i32) { - fn0 = %foo(i32) -block0(v1: i32): - ; check: v1 = spill - call fn0(v1) - ; check: call fn0 - call fn0(v1) - ; check: fill v1 - ; check: call fn0 - return -} - -; The same value used for two function arguments. -function %doubleuse(i32) { - fn0 = %xx(i32, i32) -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call fn0(v0, v0) - ; check: call fn0(v0, $c) - return -} - -; The same value used as indirect callee and argument. -function %doubleuse_icall1(i32) { - sig0 = (i32) system_v -block0(v0: i32): - ; not:copy - call_indirect sig0, v0(v0) - return -} - -; The same value used as indirect callee and two arguments. -function %doubleuse_icall2(i32) { - sig0 = (i32, i32) system_v -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call_indirect sig0, v0(v0, v0) - ; check: call_indirect sig0, v0(v0, $c) - return -} - -; Two arguments on the stack. -function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; check: ss1 = incoming_arg 4, offset 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32): - ; unordered: fill v6 - ; unordered: fill v7 - v10 = iadd v6, v7 - return v10 -} - -; More block arguments than registers. -function %blockargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - jump block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; Spilling a block argument to make room for a branch operand. -function %brargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - brnz v1, block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - jump block2 - -block2: - return v1 - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %use_spilled_value(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - - ; Here we have maximum register pressure, and v2 has been spilled. - ; What happens if we use it? - v33 = iadd v2, v14 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - v21 = iadd v22, v1 - v20 = iadd v21, v13 - v19 = iadd v20, v2 - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif deleted file mode 100644 index 219a299880..0000000000 --- a/cranelift/filetests/filetests/regalloc/unreachable_code.clif +++ /dev/null @@ -1,47 +0,0 @@ -; Use "test compile" here otherwise the dead blocks won't be eliminated. -test compile - -set enable_probestack=0 -target x86_64 legacy haswell - -; This function contains unreachable blocks which trip up the register -; allocator if they don't get cleared out. -function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v { -block0(v0: i64): - v1 = iconst.i32 0 - v2 = iconst.i32 0 - jump block2 - -block2: - jump block4 - -block4: - jump block2 - -; Everything below this point is unreachable. - -block3(v3: i32): - v5 = iadd.i32 v2, v3 - jump block6 - -block6: - jump block6 - -block7(v6: i32): - v7 = iadd.i32 v5, v6 - jump block8 - -block8: - jump block10 - -block10: - jump block8 - -block9(v8: i32): - v10 = iadd.i32 v7, v8 - jump block1(v10) - -block1(v11: i32): - return v11 -} - diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif deleted file mode 100644 index 935b33c5b7..0000000000 --- a/cranelift/filetests/filetests/regalloc/x86-regres.clif +++ /dev/null @@ -1,49 +0,0 @@ -test regalloc -target i686 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -; The value v9 appears both as the branch control and one of the block arguments -; in the brnz instruction in block2. It also happens that v7 and v9 are assigned -; to the same register, so v9 doesn't need to be moved before the brnz. -; -; This ended up confusong the constraint solver which had not made a record of -; the fixed register assignment for v9 since it was already in the correct -; register. -function %pr147(i32) -> i32 system_v { -block0(v0: i32): - v1 = iconst.i32 0 - v2 = iconst.i32 1 - v3 = iconst.i32 0 - jump block2(v3, v2, v0) - - ; check: $(splitEdge=$BB): - ; check: jump block2($V, $V, v9) - -block2(v4: i32, v5: i32, v7: i32): - ; check: block2 - v6 = iadd v4, v5 - v8 = iconst.i32 -1 - ; v7 is killed here and v9 gets the same register. - v9 = iadd v7, v8 - ; check: v9 = iadd v7, v8 - ; Here v9 the brnz control appears to interfere with v9 the block argument, - ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The - ; add_var sanity checks got confused when no fixed assignment could be - ; found for v9. - ; - ; We should be able to handle this situation without making copies of v9. - brnz v9, block2(v5, v6, v9) - ; check: brnz v9, $splitEdge - jump block3 - -block3: - return v5 -} - -function %select_i64(i64, i64, i32) -> i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = select v2, v0, v1 - return v3 -} diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif deleted file mode 100644 index fd95cc2f4c..0000000000 --- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif +++ /dev/null @@ -1,57 +0,0 @@ -test compile -target aarch64 -target x86_64 legacy - -; This checks that code shrink is allowed while relaxing code, when code shrink -; has not run. - -function u0:0(i64, i64) -> i64 system_v { - ss1 = explicit_slot 8 - sig0 = (i64) -> i64 system_v - fn0 = u0:8 sig0 - -block0(v0: i64, v1: i64): - v3 = stack_addr.i64 ss1 - v5 = call fn0(v1) - v6 = iconst.i64 0 - v8 = iconst.i64 0 - jump block3(v6, v1, v8) - -block3(v39: i64, v40: i64, v42: i64): - v9 = load.i64 v3 - v11 = icmp_imm ugt v9, 1 - v12 = bint.i8 v11 - v13 = uextend.i32 v12 - v14 = icmp_imm eq v13, 0 - brnz v14, block4 - jump block5 - -block4: - v18 = icmp_imm.i64 eq v40, 0 - v19 = bint.i8 v18 - v20 = uextend.i32 v19 - brz v20, block6 - jump block7 - -block7: - trap user0 - -block5: - v22 = iconst.i32 1 - v23 = ishl.i64 v39, v22 - v25 = iconst.i64 1 - v26 = band.i64 v42, v25 - v27 = bor v23, v26 - v28 = iconst.i32 1 - v29 = ushr.i64 v42, v28 - v30 = load.i64 v3 - v31 = iconst.i32 1 - v32 = ushr v30, v31 - store v32, v3 - jump block3(v27, v40, v29) - -block6: - v38 = iconst.i64 0 - return v38 -} - diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif deleted file mode 100644 index 47acf2ad72..0000000000 --- a/cranelift/filetests/filetests/safepoint/basic.clif +++ /dev/null @@ -1,71 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %test(i32, r64, r64) -> r64 { - block0(v0: i32, v1:r64, v2:r64): - jump block1(v0) - block1(v3: i32): - v4 = irsub_imm v3, 1 - jump block2(v4) - block2(v5: i32): - resumable_trap interrupt - brz v5, block1(v5) - jump block3 - block3: - v6 = null.r64 - v7 = is_null v6 - brnz v7, block2(v0) - jump block4 - block4: - brnz v0, block5 - jump block6 - block5: - return v1 - block6: - return v2 -} - -; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast { -; nextln: block0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]): -; nextln: v10 = copy v0 -; nextln: jump block1(v10) -; nextln: -; nextln: block7: -; nextln: regmove.i32 v5, %rcx -> %rax -; nextln: jump block1(v5) -; nextln: -; nextln: block1(v3: i32 [%rax]): -; nextln: v8 = iconst.i32 1 -; nextln: v4 = isub v8, v3 -; nextln: jump block2(v4) -; nextln: -; nextln: block8: -; nextln: v9 = copy.i32 v0 -; nextln: regmove v9, %rax -> %rcx -; nextln: jump block2(v9) -; nextln: -; nextln: block2(v5: i32 [%rcx]): -; nextln: safepoint v1, v2 -; nextln: resumable_trap interrupt -; nextln: brz v5, block7 -; nextln: jump block3 -; nextln: -; nextln: block3: -; nextln: v6 = null.r64 -; nextln: v7 = is_null v6 -; nextln: brnz v7, block8 -; nextln: jump block4 -; nextln: -; nextln: block4: -; nextln: brnz.i32 v0, block5 -; nextln: jump block6 -; nextln: -; nextln: block5: -; nextln: regmove.r64 v1, %rsi -> %rax -; nextln: return v1 -; nextln: -; nextln: block6: -; nextln: regmove.r64 v2, %rdx -> %rax -; nextln: return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif deleted file mode 100644 index ffcf41fb46..0000000000 --- a/cranelift/filetests/filetests/safepoint/call.clif +++ /dev/null @@ -1,58 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %direct() -> r64 { - fn0 = %none() - fn1 = %one() -> r64 - fn2 = %two() -> i32, r64 - -block0: - call fn0() - v1 = call fn1() - v2, v3 = call fn2() - brz v2, block2 - jump block1 -block1: - return v1 -block2: - v4 = call fn1() - return v3 -} - -; sameln: function %direct() -> r64 [%rax] fast { -; nextln: ss0 = spill_slot 8 -; nextln: ss1 = spill_slot 8 -; nextln: sig0 = () fast -; nextln: sig1 = () -> r64 [%rax] fast -; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast -; nextln: fn0 = %none sig0 -; nextln: fn1 = %one sig1 -; nextln: fn2 = %two sig2 -; nextln: -; nextln: block0: -; nextln: v5 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v5() -; nextln: v6 = func_addr.i64 fn1 -; nextln: v9 = call_indirect sig1, v6() -; nextln: v1 = spill v9 -; nextln: v7 = func_addr.i64 fn2 -; nextln: safepoint v1 -; nextln: v2, v10 = call_indirect sig2, v7() -; nextln: v3 = spill v10 -; nextln: brz v2, block2 -; nextln: jump block1 -; nextln: -; nextln: block1: -; nextln: v11 = fill.r64 v1 -; nextln: regmove v11, %r15 -> %rax -; nextln: return v11 -; nextln: -; nextln: block2: -; nextln: v8 = func_addr.i64 fn1 -; nextln: safepoint v3 -; nextln: v4 = call_indirect sig1, v8() -; nextln: v12 = fill.r64 v3 -; nextln: regmove v12, %r15 -> %rax -; nextln: return v12 -; nextln: } diff --git a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif deleted file mode 100644 index 5d10588da3..0000000000 --- a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif +++ /dev/null @@ -1,18 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with -;; its `v2` operand's instruction because `v2` is one of many results. Instead, -;; we need to make an alias `v3 -> v2`. - -function %replace_inst_with_alias() -> i32 { -block0: - v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 - v3 = isub v2, v0 - ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 - ; nextln: v3 -> v2 - return v3 -} diff --git a/cranelift/filetests/filetests/stack_maps/call.clif b/cranelift/filetests/filetests/stack_maps/call.clif deleted file mode 100644 index 6563ad450a..0000000000 --- a/cranelift/filetests/filetests/stack_maps/call.clif +++ /dev/null @@ -1,103 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -function %icall_fast(r64) -> r64 fast { -; check: function %icall_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_sys_v(r64) -> r64 system_v { -; check: function %icall_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_fastcall(r64) -> r64 windows_fastcall { -; check: function %icall_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] - -function %call_fast(r64) -> r64 fast { -; check: function %call_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_sys_v(r64) -> r64 system_v { -; check: function %call_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_fastcall(r64) -> r64 windows_fastcall { -; check: function %call_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] diff --git a/cranelift/filetests/filetests/stack_maps/incoming_args.clif b/cranelift/filetests/filetests/stack_maps/incoming_args.clif deleted file mode 100644 index e8231c3aad..0000000000 --- a/cranelift/filetests/filetests/stack_maps/incoming_args.clif +++ /dev/null @@ -1,30 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -;; Incoming args get included in stack maps. - -function %incoming_args(r64, r64, r64, r64, r64) -> r64 windows_fastcall { -; check: r64 [32] -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - - fn0 = %none() -; nextln: sig0 = () fast -; nextln: fn0 = %none sig0 - -block0(v0: r64, v1: r64, v2: r64, v3: r64, v4: r64): -; check: v4: r64 [ss0] - - call fn0() -; check: safepoint v4 -; nextln: call_indirect - return v4 -} - -; check: Stack maps: -; nextln: -; nextln: safepoint v4 -; nextln: - mapped words: 13 -; nextln: - live: [12] diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif deleted file mode 100644 index 088523d24a..0000000000 --- a/cranelift/filetests/filetests/verifier/flags.clif +++ /dev/null @@ -1,77 +0,0 @@ -test verifier -target aarch64 -target i686 - -; Simple, correct use of CPU flags. -function %simple(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -; Overlapping flag values of different types. -function %overlap(i32, f32) -> i32 { - block0(v0: i32, v1: f32): - [DynRexOp1rcmp#39] v2 = ifcmp v0, v0 - [Op2fcmp#42e] v3 = ffcmp v1, v1 - [Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3 - [Op2seti_abcd#490] v5 = trueif ugt v2 - [Op1rr#21] v6 = band v4, v5 - [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6 - [Op1ret#c3] return v7 -} - -; CPU flags clobbered by arithmetic. -function %clobbered(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [DynRexOp1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; CPU flags not clobbered by load. -function %live_across_load(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1ld#8b] v2 = load.i32 v0 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; Correct use of CPU flags across block. -function %live_across_block(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -function %live_across_block_backwards(i32) -> i32 { - block0(v0: i32): - [Op1jmpb#eb] jump block2 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 - block2: - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 -} - -; Flags live into loop. -function %live_into_loop(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op1jmpb#eb] jump block1 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif deleted file mode 100644 index f41f867918..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif +++ /dev/null @@ -1,68 +0,0 @@ -test compile -target x86_64 legacy haswell - -;; `b1` return values need to be legalized into bytes so that they can be stored -;; in memory. - -function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 { -;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast { - -block0(v0: b1, v1: b1, v2: b1, v3: b1): -; check: block0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]): - - return v0, v1, v2, v3 - ; check: v5 = bint.i8 v0 - ; nextln: v9 = uextend.i32 v5 - ; nextln: istore8 notrap aligned v9, v4 - ; nextln: v6 = bint.i8 v1 - ; nextln: v10 = uextend.i32 v6 - ; nextln: istore8 notrap aligned v10, v4+1 - ; nextln: v7 = bint.i8 v2 - ; nextln: v11 = uextend.i32 v7 - ; nextln: istore8 notrap aligned v11, v4+2 - ; nextln: v8 = bint.i8 v3 - ; nextln: v12 = uextend.i32 v8 - ; nextln: istore8 notrap aligned v12, v4+3 -} - -function %call_4_b1s() { -; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast { -; nextln: ss0 = sret_slot 4, offset -28 - - fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 - ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0: -; check: block0(v26: i64 [%rbp], v27: i64 [%rbx]): - - v0 = bconst.b1 true - v1 = bconst.b1 false - v2 = bconst.b1 true - v3 = bconst.b1 false - - ; check: v8 = stack_addr.i64 ss0 - v4, v5, v6, v7 = call fn0(v0, v1, v2, v3) - ; check: v9 = call fn0(v0, v1, v2, v3, v8) - ; nextln: v22 = uload8.i32 notrap aligned v9 - ; nextln: v10 = ireduce.i8 v22 - ; nextln: v11 = raw_bitcast.b8 v10 - ; nextln: v12 = breduce.b1 v11 - ; nextln: v4 -> v12 - ; nextln: v23 = uload8.i32 notrap aligned v9+1 - ; nextln: v13 = ireduce.i8 v23 - ; nextln: v14 = raw_bitcast.b8 v13 - ; nextln: v15 = breduce.b1 v14 - ; nextln: v5 -> v15 - ; nextln: v24 = uload8.i32 notrap aligned v9+2 - ; nextln: v16 = ireduce.i8 v24 - ; nextln: v17 = raw_bitcast.b8 v16 - ; nextln: v18 = breduce.b1 v17 - ; nextln: v6 -> v18 - ; nextln: v25 = uload8.i32 notrap aligned v9+3 - ; nextln: v19 = ireduce.i8 v25 - ; nextln: v20 = raw_bitcast.b8 v19 - ; nextln: v21 = breduce.b1 v20 - ; nextln: v7 -> v21 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif deleted file mode 100644 index 06d0814dfb..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif +++ /dev/null @@ -1,26 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Indirect calls with many returns. - -function %call_indirect_many_rets(i64) { - ; check: ss0 = sret_slot 32 - - sig0 = () -> i64, i64, i64, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0(v0: i64): - v1, v2, v3, v4 = call_indirect sig0, v0() - ; check: v5 = stack_addr.i64 ss0 - ; nextln: v6 = call_indirect sig0, v0(v5) - ; nextln: v7 = load.i64 notrap aligned v6 - ; nextln: v1 -> v7 - ; nextln: v8 = load.i64 notrap aligned v6+8 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v6+16 - ; nextln: v3 -> v9 - ; nextln: v10 = load.i64 notrap aligned v6+24 - ; nextln: v4 -> v10 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif deleted file mode 100644 index aae733ddf4..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif +++ /dev/null @@ -1,24 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test if arguments are legalized if function uses sret - -function %call_indirect_with_split_arg(i64, i64, i64) { - ; check: ss0 = sret_slot 32 - sig0 = (i128) -> i64, i64, i64, i64 - ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4, v5, v6, v7 = call_indirect sig0, v0(v3) - ; check: v8 = stack_addr.i64 ss0 - ; check: v9 = call_indirect sig0, v0(v1, v2, v8) - ; check: v10 = load.i64 notrap aligned v9 - ; check: v4 -> v10 - ; check: v11 = load.i64 notrap aligned v9+8 - ; check: v5 -> v11 - ; check: v12 = load.i64 notrap aligned v9+16 - ; check: v6 -> v12 - ; check: v13 = load.i64 notrap aligned v9+24 - ; check: v7 -> v13 - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif deleted file mode 100644 index c58102aedc..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif +++ /dev/null @@ -1,61 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test that we don't reuse `sret` stack slots for multiple calls. We could do -;; this one day, but it would require some care to ensure that we don't have -;; subsequent calls overwrite the results of previous calls. - -function %foo() -> i32, f32 { - ; check: ss0 = sret_slot 20 - ; nextln: ss1 = sret_slot 20 - - fn0 = %f() -> i32, i32, i32, i32, i32 - fn1 = %g() -> f32, f32, f32, f32, f32 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %f sig0 - ; nextln: fn1 = %g sig1 - -block0: - v0, v1, v2, v3, v4 = call fn0() - ; check: v18 = stack_addr.i64 ss0 - ; nextln: v25 = func_addr.i64 fn0 - ; nextln: v19 = call_indirect sig0, v25(v18) - ; nextln: v20 = load.i32 notrap aligned v19 - ; nextln: v0 -> v20 - ; nextln: v21 = load.i32 notrap aligned v19+4 - ; nextln: v1 -> v21 - ; nextln: v22 = load.i32 notrap aligned v19+8 - ; nextln: v2 -> v22 - ; nextln: v23 = load.i32 notrap aligned v19+12 - ; nextln: v3 -> v23 - ; nextln: v24 = load.i32 notrap aligned v19+16 - ; nextln: v4 -> v24 - - v5, v6, v7, v8, v9 = call fn1() - ; check: v26 = stack_addr.i64 ss1 - ; nextln: v33 = func_addr.i64 fn1 - ; nextln: v27 = call_indirect sig1, v33(v26) - ; nextln: v28 = load.f32 notrap aligned v27 - ; nextln: v5 -> v28 - ; nextln: v29 = load.f32 notrap aligned v27+4 - ; nextln: v6 -> v29 - ; nextln: v30 = load.f32 notrap aligned v27+8 - ; nextln: v7 -> v30 - ; nextln: v31 = load.f32 notrap aligned v27+12 - ; nextln: v8 -> v31 - ; nextln: v32 = load.f32 notrap aligned v27+16 - ; nextln: v9 -> v32 - - v10 = iadd v0, v1 - v11 = iadd v2, v3 - v12 = iadd v10, v11 - v13 = iadd v12, v4 - - v14 = fadd v5, v6 - v15 = fadd v7, v8 - v16 = fadd v14, v15 - v17 = fadd v16, v9 - - return v13, v17 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif deleted file mode 100644 index da9f25ed97..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif +++ /dev/null @@ -1,51 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Need to insert padding after the `i8`s so that the `i32` and `i64` are -;; aligned. - -function %returner() -> i8, i32, i8, i64 { -; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast { - -block0: -; check: block0(v4: i64): - - v0 = iconst.i8 0 - v1 = iconst.i32 1 - v2 = iconst.i8 2 - v3 = iconst.i64 3 - return v0, v1, v2, v3 - ; check: v6 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v6, v4 - ; nextln: store notrap aligned v1, v4+4 - ; nextln: v7 = uextend.i32 v2 - ; nextln: istore8 notrap aligned v7, v4+8 - ; nextln: store notrap aligned v3, v4+16 - ; nextln: return v4 -} - -function %caller() { - ; check: ss0 = sret_slot 24 - - fn0 = %returner() -> i8, i32, i8, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %returner sig0 - -block0: - v0, v1, v2, v3 = call fn0() - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v10 = func_addr.i64 fn0 - ; nextln: v5 = call_indirect sig0, v10(v4) - ; nextln: v11 = uload8.i32 notrap aligned v5 - ; nextln: v6 = ireduce.i8 v11 - ; nextln: v0 -> v6 - ; nextln: v7 = load.i32 notrap aligned v5+4 - ; nextln: v1 -> v7 - ; nextln: v12 = uload8.i32 notrap aligned v5+8 - ; nextln: v8 = ireduce.i8 v12 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v5+16 - ; nextln: v3 -> v9 - - return -} diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index d764b916e5..6a7fb5a282 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -47,8 +47,8 @@ impl SingleFunctionCompiler { } /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags. - pub fn with_host_isa(flags: settings::Flags, variant: BackendVariant) -> Self { - let builder = builder_with_options(variant, true) + pub fn with_host_isa(flags: settings::Flags) -> Self { + let builder = builder_with_options(true) .expect("Unable to build a TargetIsa for the current host"); let isa = builder.finish(flags); Self::new(isa) @@ -58,7 +58,7 @@ impl SingleFunctionCompiler { /// ISA. pub fn with_default_host_isa() -> Self { let flags = settings::Flags::new(settings::builder()); - Self::with_host_isa(flags, BackendVariant::Any) + Self::with_host_isa(flags) } /// Compile the passed [Function] to a `CompiledFunction`. This function will: diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 4b9e528cfd..86b346e21b 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -48,11 +48,10 @@ impl SubTest for TestRun { ); return Ok(()); } - let variant = context.isa.unwrap().variant(); let test_env = RuntestEnvironment::parse(&context.details.comments[..])?; - let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone(), variant); + let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone()); for comment in context.details.comments.iter() { if let Some(command) = parse_run_command(comment.text, &func.signature)? { trace!("Parsed run command: {}", command); diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index a82be29ace..93c6bf0a3f 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -1033,44 +1033,6 @@ where } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), - // TODO: these instructions should be removed once the new backend makes these obsolete - // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the - // "all-arch" feature for cranelift-codegen would become unnecessary for this crate. - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Vcvtudq2ps - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pblendw - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => unimplemented!("x86 instruction: {}", inst.opcode()), Opcode::JumpTableBase | Opcode::JumpTableEntry | Opcode::IndirectJumpTableBr => { unimplemented!("Legacy instruction: {}", inst.opcode()) } diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index b2364c6ad9..c2a5aa78b8 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -30,7 +30,7 @@ use target_lexicon::Triple; /// machine, or `Err(())` if the host machine is not supported /// in the current configuration. pub fn builder() -> Result { - builder_with_options(isa::BackendVariant::Any, true) + builder_with_options(true) } /// Return an `isa` builder configured for the current host @@ -40,17 +40,11 @@ pub fn builder() -> Result { /// Selects the given backend variant specifically; this is /// useful when more than oen backend exists for a given target /// (e.g., on x86-64). -pub fn builder_with_options( - variant: isa::BackendVariant, - infer_native_flags: bool, -) -> Result { - let mut isa_builder = - isa::lookup_variant(Triple::host(), variant).map_err(|err| match err { - isa::LookupError::SupportDisabled => { - "support for architecture disabled at compile time" - } - isa::LookupError::Unsupported => "unsupported architecture", - })?; +pub fn builder_with_options(infer_native_flags: bool) -> Result { + let mut isa_builder = isa::lookup_variant(Triple::host()).map_err(|err| match err { + isa::LookupError::SupportDisabled => "support for architecture disabled at compile time", + isa::LookupError::Unsupported => "unsupported architecture", + })?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index a8f20230fc..98c46ab2e3 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -22,7 +22,7 @@ use cranelift_codegen::ir::{ HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc, }; -use cranelift_codegen::isa::{self, BackendVariant, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::{settings, settings::Configurable, timing}; use smallvec::SmallVec; @@ -1159,19 +1159,7 @@ impl<'a> Parser<'a> { Ok(triple) => triple, Err(err) => return err!(loc, err), }; - // Look for `machinst` or `legacy` option before instantiating IsaBuilder. - let variant = match words.peek() { - Some(&"machinst") => { - words.next(); - BackendVariant::MachInst - } - Some(&"legacy") => { - words.next(); - BackendVariant::Legacy - } - _ => BackendVariant::Any, - }; - let mut isa_builder = match isa::lookup_variant(triple, variant) { + let mut isa_builder = match isa::lookup_variant(triple) { Err(isa::LookupError::SupportDisabled) => { continue; } diff --git a/cranelift/tests/bugpoint_test.clif b/cranelift/tests/bugpoint_test.clif index b2e9acc37e..ced5b9e809 100644 --- a/cranelift/tests/bugpoint_test.clif +++ b/cranelift/tests/bugpoint_test.clif @@ -300,7 +300,8 @@ block0(v0: i64, v1: i64, v2: i64): v241 -> v1 v256 -> v1 v262 -> v1 - v3, v4 = x86_sdivmodx v0, v1, v2 + v3 = imul v0, v1 + v4 = imul v1, v2 store aligned v4, v3 v5 = load.i64 aligned v2+8 store aligned v5, v3+8 diff --git a/crates/bench-api/Cargo.toml b/crates/bench-api/Cargo.toml index a67b48c501..2edf20800c 100644 --- a/crates/bench-api/Cargo.toml +++ b/crates/bench-api/Cargo.toml @@ -31,4 +31,3 @@ wat = "1.0" default = ["shuffling-allocator"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] -old-x86-backend = ["wasmtime/old-x86-backend"] diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 49a9069dc1..331f660f79 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -27,4 +27,3 @@ thiserror = "1.0.4" [features] all-arch = ["cranelift-codegen/all-arch"] -old-x86-backend = ["cranelift-codegen/old-x86-backend"] diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index badd61a737..b38205bfeb 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -71,9 +71,6 @@ parallel-compilation = ["rayon"] # Enables support for automatic cache configuration to be enabled in `Config`. cache = ["wasmtime-cache"] -# Use Cranelift's old x86 backend. -old-x86-backend = ["wasmtime-cranelift/old-x86-backend"] - # Enables support for "async stores" as well as defining host functions as # `async fn` and calling functions asynchronously. async = ["wasmtime-fiber", "wasmtime-runtime/async"] diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 5615d438c2..dd8877e8f9 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1129,9 +1129,7 @@ impl Func { /// and similarly if a function has multiple results you can bind that too /// /// ``` - /// # #[cfg(not(feature = "old-x86-backend"))] /// # use wasmtime::*; - /// # #[cfg(not(feature = "old-x86-backend"))] /// # fn foo(add_with_overflow: &Func, mut store: Store<()>) -> anyhow::Result<()> { /// let typed = add_with_overflow.typed::<(u32, u32), (u32, i32), _>(&store)?; /// let (result, overflow) = typed.call(&mut store, (u32::max_value(), 2))?; @@ -1564,12 +1562,10 @@ macro_rules! impl_host_abi { #[doc(hidden)] #[allow(non_snake_case)] #[repr(C)] - #[cfg(not(feature = "old-x86-backend"))] pub struct []<$($u,)*> { $($u: $u,)* } - #[cfg(not(feature = "old-x86-backend"))] #[allow(non_snake_case, unused_assignments)] impl<$t: Copy, $($u: Copy,)*> HostAbi for ($t, $($u,)*) { type Abi = $t; diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 89f851c488..2c5e05df89 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -134,54 +134,7 @@ impl ModuleInfo for RegisteredModule { // Because we know we are in Wasm code, and we must be at some kind // of call/safepoint, then the Cranelift backend must have avoided // emitting a stack map for this location because no refs were live. - #[cfg(not(feature = "old-x86-backend"))] Err(_) => return None, - - // ### Old x86_64 backend specific code. - // - // Because GC safepoints are technically only associated with a - // single PC, we should ideally only care about `Ok(index)` values - // returned from the binary search. However, safepoints are inserted - // right before calls, and there are two things that can disturb the - // PC/offset associated with the safepoint versus the PC we actually - // use to query for the stack map: - // - // 1. The `backtrace` crate gives us the PC in a frame that will be - // *returned to*, and where execution will continue from, rather than - // the PC of the call we are currently at. So we would need to - // disassemble one instruction backwards to query the actual PC for - // the stack map. - // - // TODO: One thing we *could* do to make this a little less error - // prone, would be to assert/check that the nearest GC safepoint - // found is within `max_encoded_size(any kind of call instruction)` - // our queried PC for the target architecture. - // - // 2. Cranelift's stack maps only handle the stack, not - // registers. However, some references that are arguments to a call - // may need to be in registers. In these cases, what Cranelift will - // do is: - // - // a. spill all the live references, - // b. insert a GC safepoint for those references, - // c. reload the references into registers, and finally - // d. make the call. - // - // Step (c) adds drift between the GC safepoint and the location of - // the call, which is where we actually walk the stack frame and - // collect its live references. - // - // Luckily, the spill stack slots for the live references are still - // up to date, so we can still find all the on-stack roots. - // Furthermore, we do not have a moving GC, so we don't need to worry - // whether the following code will reuse the references in registers - // (which would not have been updated to point to the moved objects) - // or reload from the stack slots (which would have been updated to - // point to the moved objects). - #[cfg(feature = "old-x86-backend")] - Err(0) => return None, - #[cfg(feature = "old-x86-backend")] - Err(i) => i - 1, }; Some(&info.stack_maps[index].stack_map) diff --git a/examples/multi.rs b/examples/multi.rs index df36671ceb..b243d83cd7 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -9,7 +9,6 @@ use anyhow::Result; -#[cfg(not(feature = "old-x86-backend"))] fn main() -> Result<()> { use wasmtime::*; @@ -63,11 +62,3 @@ fn main() -> Result<()> { Ok(()) } - -// Note that this example is not supported in the off-by-default feature of the -// old x86 compiler backend for Cranelift. Wasmtime's default configuration -// supports this example, however. -#[cfg(feature = "old-x86-backend")] -fn main() -> Result<()> { - Ok(()) -} diff --git a/tests/all/debug/lldb.rs b/tests/all/debug/lldb.rs index 3e72eaf49f..dada5deee8 100644 --- a/tests/all/debug/lldb.rs +++ b/tests/all/debug/lldb.rs @@ -133,44 +133,6 @@ check: exited with status Ok(()) } -#[test] -#[ignore] -#[cfg(all( - any(target_os = "linux", target_os = "macos"), - target_pointer_width = "64", - // Ignore test on new backend. The value this is looking for is - // not available at the point that the breakpoint is set when - // compiled by the new backend. - feature = "old-x86-backend", -))] -pub fn test_debug_dwarf_ptr() -> Result<()> { - let output = lldb_with_script( - &[ - "-g", - "--opt-level", - "0", - "tests/all/debug/testsuite/reverse-str.wasm", - ], - r#"b reverse-str.c:9 -r -p __vmctx->set(),&*s -c"#, - )?; - - check_lldb_output( - &output, - r#" -check: Breakpoint 1: no locations (pending) -check: stop reason = breakpoint 1.1 -check: frame #0 -sameln: reverse(s=(__ptr = -check: "Hello, world." -check: resuming -"#, - )?; - Ok(()) -} - #[test] #[ignore] #[cfg(all( diff --git a/tests/all/debug/translate.rs b/tests/all/debug/translate.rs index 2560a71b03..aa1b79343b 100644 --- a/tests/all/debug/translate.rs +++ b/tests/all/debug/translate.rs @@ -109,26 +109,3 @@ check: DW_AT_decl_line (10) ) } -#[test] -#[ignore] -#[cfg(all( - any(target_os = "linux", target_os = "macos"), - target_arch = "x86_64", - target_pointer_width = "64", - // Ignore test on new backend. This is a specific test with hardcoded - // offsets and the new backend compiles the return basic-block at a different - // offset, causing mismatches. - feature = "old-x86-backend", -))] -fn test_debug_dwarf5_translate_lines() -> Result<()> { - check_line_program( - "tests/all/debug/testsuite/fib-wasm-dwarf5.wasm", - r##" -check: Address Line Column File ISA Discriminator Flags -check: 0x000000000000013c 15 3 1 0 0 -# The important point is that the following offset must be _after_ the `ret` instruction. -# FIXME: this +1 increment might vary on other archs. -nextln: 0x000000000000013d 15 3 1 0 0 end_sequence - "##, - ) -} diff --git a/tests/all/func.rs b/tests/all/func.rs index da4c630fa4..eef7a9efd6 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -525,7 +525,6 @@ fn pass_cross_store_arg() -> anyhow::Result<()> { } #[test] -#[cfg_attr(feature = "old-x86-backend", ignore)] fn externref_signature_no_reference_types() -> anyhow::Result<()> { let mut config = Config::new(); config.wasm_reference_types(false); @@ -569,7 +568,6 @@ fn trampolines_always_valid() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn typed_multiple_results() -> anyhow::Result<()> { let mut store = Store::<()>::default(); let module = Module::new( @@ -647,7 +645,6 @@ fn trap_doesnt_leak() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn wrap_multiple_results() -> anyhow::Result<()> { fn test(store: &mut Store<()>, t: T) -> anyhow::Result<()> where diff --git a/tests/all/gc.rs b/tests/all/gc.rs index 73ffe03ece..27c9341ea0 100644 --- a/tests/all/gc.rs +++ b/tests/all/gc.rs @@ -189,7 +189,6 @@ fn many_live_refs() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn drop_externref_via_table_set() -> anyhow::Result<()> { let (mut store, module) = ref_types_module( r#" @@ -285,7 +284,6 @@ fn global_drops_externref() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn table_drops_externref() -> anyhow::Result<()> { test_engine(&Engine::default())?; @@ -336,7 +334,6 @@ fn table_drops_externref() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn gee_i_sure_hope_refcounting_is_atomic() -> anyhow::Result<()> { let mut config = Config::new(); config.wasm_reference_types(true); @@ -426,7 +423,6 @@ fn global_init_no_leak() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn no_gc_middle_of_args() -> anyhow::Result<()> { let (mut store, module) = ref_types_module( r#" diff --git a/tests/all/relocs.rs b/tests/all/relocs.rs index 6dab73cd74..fdd0730dfa 100644 --- a/tests/all/relocs.rs +++ b/tests/all/relocs.rs @@ -8,8 +8,6 @@ //! 32-bits, and right now object files aren't supported larger than 4gb anyway //! so we would need a lot of other support necessary to exercise that. -#![cfg(not(feature = "old-x86-backend"))] // multi-value not supported here - use anyhow::Result; use wasmtime::*; diff --git a/tests/all/wast.rs b/tests/all/wast.rs index 675850df36..f700842193 100644 --- a/tests/all/wast.rs +++ b/tests/all/wast.rs @@ -25,12 +25,6 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()> // by reference types. let reftypes = simd || feature_found(wast, "reference-types"); - // Threads & simd aren't implemented in the old backend, so skip those - // tests. - if (threads || simd) && cfg!(feature = "old-x86-backend") { - return Ok(()); - } - let mut cfg = Config::new(); cfg.wasm_simd(simd) .wasm_bulk_memory(bulk_mem) From 59e18b7d1b130a0e498701a37cdb3bdc33dd02d0 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Fri, 18 Jun 2021 19:25:11 +0200 Subject: [PATCH 02/14] Remove the old riscv backend --- cranelift/codegen/Cargo.toml | 2 - cranelift/codegen/meta/src/isa/mod.rs | 7 +- .../codegen/meta/src/isa/riscv/encodings.rs | 431 ------------------ cranelift/codegen/meta/src/isa/riscv/mod.rs | 136 ------ .../codegen/meta/src/isa/riscv/recipes.rs | 280 ------------ cranelift/codegen/meta/src/lib.rs | 2 +- cranelift/codegen/src/binemit/mod.rs | 4 +- cranelift/codegen/src/isa/legacy/mod.rs | 4 - cranelift/codegen/src/isa/legacy/riscv/abi.rs | 149 ------ .../codegen/src/isa/legacy/riscv/binemit.rs | 182 -------- .../src/isa/legacy/riscv/enc_tables.rs | 18 - cranelift/codegen/src/isa/legacy/riscv/mod.rs | 304 ------------ .../codegen/src/isa/legacy/riscv/registers.rs | 50 -- .../codegen/src/isa/legacy/riscv/settings.rs | 56 --- cranelift/codegen/src/isa/mod.rs | 9 - cranelift/codegen/src/legalizer/mod.rs | 9 - .../filetests/filetests/isa/riscv/abi-e.clif | 14 - .../filetests/filetests/isa/riscv/abi.clif | 32 -- .../filetests/isa/riscv/binary32.clif | 189 -------- .../filetests/isa/riscv/encoding.clif | 21 - .../filetests/isa/riscv/expand-i32.clif | 37 -- .../filetests/isa/riscv/legalize-abi.clif | 134 ------ .../filetests/isa/riscv/legalize-i64.clif | 64 --- .../isa/riscv/legalize-icmp_imm-i64.clif | 55 --- .../filetests/isa/riscv/parse-encoding.clif | 36 -- .../filetests/isa/riscv/regmove.clif | 15 - .../filetests/isa/riscv/split-args.clif | 55 --- .../filetests/isa/riscv/verify-encoding.clif | 21 - .../parser/instruction_encoding.clif | 24 - cranelift/filetests/src/function_runner.rs | 2 +- cranelift/src/disasm.rs | 22 - cranelift/wasm/Cargo.toml | 3 +- cranelift/wasm/tests/wasm_testsuite.rs | 44 +- 33 files changed, 33 insertions(+), 2378 deletions(-) delete mode 100644 cranelift/codegen/meta/src/isa/riscv/encodings.rs delete mode 100644 cranelift/codegen/meta/src/isa/riscv/mod.rs delete mode 100644 cranelift/codegen/meta/src/isa/riscv/recipes.rs delete mode 100644 cranelift/codegen/src/isa/legacy/mod.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/abi.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/binemit.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/mod.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/registers.rs delete mode 100644 cranelift/codegen/src/isa/legacy/riscv/settings.rs delete mode 100644 cranelift/filetests/filetests/isa/riscv/abi-e.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/abi.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/binary32.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/encoding.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/expand-i32.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/legalize-abi.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/legalize-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/parse-encoding.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/regmove.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/split-args.clif delete mode 100644 cranelift/filetests/filetests/isa/riscv/verify-encoding.clif delete mode 100644 cranelift/filetests/filetests/parser/instruction_encoding.clif diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 4397568e5a..984d696373 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -63,7 +63,6 @@ unwind = ["gimli"] # If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled. x86 = [] arm64 = [] -riscv = [] s390x = [] arm32 = [] # Work-in-progress codegen backend for ARM. @@ -75,7 +74,6 @@ experimental_x64 = [] all-arch = [ "x86", "arm64", - "riscv", "s390x" ] diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index 34032842c2..9465e63b26 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -5,14 +5,12 @@ use std::fmt; mod arm32; mod arm64; -mod riscv; mod s390x; pub(crate) mod x86; /// Represents known ISA target. #[derive(PartialEq, Copy, Clone)] pub enum Isa { - Riscv, X86, Arm32, Arm64, @@ -31,7 +29,6 @@ impl Isa { /// Creates isa target from arch. pub fn from_arch(arch: &str) -> Option { match arch { - "riscv" => Some(Isa::Riscv), "aarch64" => Some(Isa::Arm64), "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), @@ -42,7 +39,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] + &[Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] } } @@ -50,7 +47,6 @@ impl fmt::Display for Isa { // These names should be kept in sync with the crate features. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { - Isa::Riscv => write!(f, "riscv"), Isa::X86 => write!(f, "x86"), Isa::Arm32 => write!(f, "arm32"), Isa::Arm64 => write!(f, "arm64"), @@ -62,7 +58,6 @@ impl fmt::Display for Isa { pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec { isas.iter() .map(|isa| match isa { - Isa::Riscv => riscv::define(shared_defs), Isa::X86 => x86::define(shared_defs), Isa::Arm32 => arm32::define(shared_defs), Isa::Arm64 => arm64::define(shared_defs), diff --git a/cranelift/codegen/meta/src/isa/riscv/encodings.rs b/cranelift/codegen/meta/src/isa/riscv/encodings.rs deleted file mode 100644 index c255ddb483..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/encodings.rs +++ /dev/null @@ -1,431 +0,0 @@ -use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; -use crate::cdsl::encodings::{Encoding, EncodingBuilder}; -use crate::cdsl::instructions::{ - Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, -}; -use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::SettingGroup; - -use crate::shared::types::Bool::B1; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -use super::recipes::RecipeGroup; - -pub(crate) struct PerCpuModeEncodings<'defs> { - pub inst_pred_reg: InstructionPredicateRegistry, - pub enc32: Vec, - pub enc64: Vec, - recipes: &'defs Recipes, -} - -impl<'defs> PerCpuModeEncodings<'defs> { - fn new(recipes: &'defs Recipes) -> Self { - Self { - inst_pred_reg: InstructionPredicateRegistry::new(), - enc32: Vec::new(), - enc64: Vec::new(), - recipes, - } - } - fn enc( - &self, - inst: impl Into, - recipe: EncodingRecipeNumber, - bits: u16, - ) -> EncodingBuilder { - EncodingBuilder::new(inst.into(), recipe, bits) - } - fn add32(&mut self, encoding: EncodingBuilder) { - self.enc32 - .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); - } - fn add64(&mut self, encoding: EncodingBuilder) { - self.enc64 - .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); - } -} - -// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as -// the two low bits, with bits 6:2 determining the base opcode. -// -// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ... -// The functions below encode the encbits. - -fn load_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - funct3 << 5 -} - -fn store_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b01000 | (funct3 << 5) -} - -fn branch_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b11000 | (funct3 << 5) -} - -fn jalr_bits() -> u16 { - // This was previously accepting an argument funct3 of 3 bits and used the following formula: - //0b11001 | (funct3 << 5) - 0b11001 -} - -fn jal_bits() -> u16 { - 0b11011 -} - -fn opimm_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b00100 | (funct3 << 5) | (funct7 << 8) -} - -fn opimm32_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b00110 | (funct3 << 5) | (funct7 << 8) -} - -fn op_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - assert!(funct7 <= 0b111_1111); - 0b01100 | (funct3 << 5) | (funct7 << 8) -} - -fn op32_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - assert!(funct7 <= 0b111_1111); - 0b01110 | (funct3 << 5) | (funct7 << 8) -} - -fn lui_bits() -> u16 { - 0b01101 -} - -pub(crate) fn define<'defs>( - shared_defs: &'defs SharedDefinitions, - isa_settings: &SettingGroup, - recipes: &'defs RecipeGroup, -) -> PerCpuModeEncodings<'defs> { - // Instructions shorthands. - let shared = &shared_defs.instructions; - - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let br_icmp = shared.by_name("br_icmp"); - let brz = shared.by_name("brz"); - let brnz = shared.by_name("brnz"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let iadd = shared.by_name("iadd"); - let iadd_imm = shared.by_name("iadd_imm"); - let iconst = shared.by_name("iconst"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); - let imul = shared.by_name("imul"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let isub = shared.by_name("isub"); - let jump = shared.by_name("jump"); - let regmove = shared.by_name("regmove"); - let spill = shared.by_name("spill"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let return_ = shared.by_name("return"); - - // Recipes shorthands, prefixed with r_. - let r_copytossa = recipes.by_name("copytossa"); - let r_fillnull = recipes.by_name("fillnull"); - let r_icall = recipes.by_name("Icall"); - let r_icopy = recipes.by_name("Icopy"); - let r_ii = recipes.by_name("Ii"); - let r_iicmp = recipes.by_name("Iicmp"); - let r_iret = recipes.by_name("Iret"); - let r_irmov = recipes.by_name("Irmov"); - let r_iz = recipes.by_name("Iz"); - let r_gp_sp = recipes.by_name("GPsp"); - let r_gp_fi = recipes.by_name("GPfi"); - let r_r = recipes.by_name("R"); - let r_ricmp = recipes.by_name("Ricmp"); - let r_rshamt = recipes.by_name("Rshamt"); - let r_sb = recipes.by_name("SB"); - let r_sb_zero = recipes.by_name("SBzero"); - let r_stacknull = recipes.by_name("stacknull"); - let r_u = recipes.by_name("U"); - let r_uj = recipes.by_name("UJ"); - let r_uj_call = recipes.by_name("UJcall"); - - // Predicates shorthands. - let use_m = isa_settings.predicate_by_name("use_m"); - - // Definitions. - let mut e = PerCpuModeEncodings::new(&recipes.recipes); - - // Basic arithmetic binary instructions are encoded in an R-type instruction. - for &(inst, inst_imm, f3, f7) in &[ - (iadd, Some(iadd_imm), 0b000, 0b000_0000), - (isub, None, 0b000, 0b010_0000), - (bxor, Some(bxor_imm), 0b100, 0b000_0000), - (bor, Some(bor_imm), 0b110, 0b000_0000), - (band, Some(band_imm), 0b111, 0b000_0000), - ] { - e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); - - // Immediate versions for add/xor/or/and. - if let Some(inst_imm) = inst_imm { - e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); - e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); - } - } - - // 32-bit ops in RV64. - e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000))); - e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000))); - // There are no andiw/oriw/xoriw variations. - e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); - - // Use iadd_imm with %x0 to materialize constants. - e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); - - // Dynamic shifts have the same masking semantics as the clif base instructions. - for &(inst, inst_imm, f3, f7) in &[ - (ishl, ishl_imm, 0b1, 0b0), - (ushr, ushr_imm, 0b101, 0b0), - (sshr, sshr_imm, 0b101, 0b10_0000), - ] { - e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); - // Allow i32 shift amounts in 64-bit shifts. - e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); - - // Immediate shifts. - e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); - e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); - e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); - } - - // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit - // numbers in RV64. - { - let mut var_pool = VarPool::new(); - - // Helper that creates an instruction predicate for an instruction in the icmp family. - let mut icmp_instp = |bound_inst: &BoundInstruction, - intcc_field: &'static str| - -> InstructionPredicateNode { - let x = var_pool.create("x"); - let y = var_pool.create("y"); - let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); - Apply::new( - bound_inst.clone().into(), - vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)], - ) - .inst_predicate(&var_pool) - .unwrap() - }; - - let icmp_i32 = icmp.bind(I32); - let icmp_i64 = icmp.bind(I64); - e.add32( - e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i32, "slt")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i64, "slt")), - ); - - e.add32( - e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i32, "ult")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i64, "ult")), - ); - - // Immediate variants. - let icmp_i32 = icmp_imm.bind(I32); - let icmp_i64 = icmp_imm.bind(I64); - e.add32( - e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) - .inst_predicate(icmp_instp(&icmp_i32, "slt")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) - .inst_predicate(icmp_instp(&icmp_i64, "slt")), - ); - - e.add32( - e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) - .inst_predicate(icmp_instp(&icmp_i32, "ult")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) - .inst_predicate(icmp_instp(&icmp_i64, "ult")), - ); - } - - // Integer constants with the low 12 bits clear are materialized by lui. - e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); - - // "M" Standard Extension for Integer Multiplication and Division. - // Gated by the `use_m` flag. - e.add32( - e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - e.add64( - e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - e.add64( - e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - - // Control flow. - - // Unconditional branches. - e.add32(e.enc(jump, r_uj, jal_bits())); - e.add64(e.enc(jump, r_uj, jal_bits())); - e.add32(e.enc(call, r_uj_call, jal_bits())); - e.add64(e.enc(call, r_uj_call, jal_bits())); - - // Conditional branches. - { - let mut var_pool = VarPool::new(); - - // Helper that creates an instruction predicate for an instruction in the icmp family. - let mut br_icmp_instp = |bound_inst: &BoundInstruction, - intcc_field: &'static str| - -> InstructionPredicateNode { - let x = var_pool.create("x"); - let y = var_pool.create("y"); - let dest = var_pool.create("dest"); - let args = var_pool.create("args"); - let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); - Apply::new( - bound_inst.clone().into(), - vec![ - Expr::Literal(cc), - Expr::Var(x), - Expr::Var(y), - Expr::Var(dest), - Expr::Var(args), - ], - ) - .inst_predicate(&var_pool) - .unwrap() - }; - - let br_icmp_i32 = br_icmp.bind(I32); - let br_icmp_i64 = br_icmp.bind(I64); - for &(cond, f3) in &[ - ("eq", 0b000), - ("ne", 0b001), - ("slt", 0b100), - ("sge", 0b101), - ("ult", 0b110), - ("uge", 0b111), - ] { - e.add32( - e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) - .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), - ); - e.add64( - e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) - .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), - ); - } - } - - for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { - e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); - e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); - e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); - e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); - } - - // Returns are a special case of jalr_bits using %x1 to hold the return address. - // The return address is provided by a special-purpose `link` return value that - // is added by legalize_signature(). - e.add32(e.enc(return_, r_iret, jalr_bits())); - e.add64(e.enc(return_, r_iret, jalr_bits())); - e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); - e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); - - // Spill and fill. - e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); - e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); - - // No-op fills, created by late-stage redundant-fill removal. - for &ty in &[I64, I32] { - e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); - e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); - } - e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); - e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); - - // Register copies. - e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); - - e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); - - e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); - - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn - // into a no-op. - // The same encoding is generated for both the 64- and 32-bit architectures. - for &ty in &[I64, I32, I16, I8] { - e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - } - for &ty in &[F64, F32] { - e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - } - - // Copy-to-SSA - e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); - e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); - - e -} diff --git a/cranelift/codegen/meta/src/isa/riscv/mod.rs b/cranelift/codegen/meta/src/isa/riscv/mod.rs deleted file mode 100644 index 868ac17cfe..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/mod.rs +++ /dev/null @@ -1,136 +0,0 @@ -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; -use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; - -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I32, I64}; -use crate::shared::Definitions as SharedDefinitions; - -mod encodings; -mod recipes; - -fn define_settings(shared: &SettingGroup) -> SettingGroup { - let mut setting = SettingGroupBuilder::new("riscv"); - - let supports_m = setting.add_bool( - "supports_m", - "CPU supports the 'M' extension (mul/div)", - "", - false, - ); - let supports_a = setting.add_bool( - "supports_a", - "CPU supports the 'A' extension (atomics)", - "", - false, - ); - let supports_f = setting.add_bool( - "supports_f", - "CPU supports the 'F' extension (float)", - "", - false, - ); - let supports_d = setting.add_bool( - "supports_d", - "CPU supports the 'D' extension (double)", - "", - false, - ); - - let enable_m = setting.add_bool( - "enable_m", - "Enable the use of 'M' instructions if available", - "", - true, - ); - - setting.add_bool( - "enable_e", - "Enable the 'RV32E' instruction set with only 16 registers", - "", - false, - ); - - let shared_enable_atomics = shared.get_bool("enable_atomics"); - let shared_enable_float = shared.get_bool("enable_float"); - let shared_enable_simd = shared.get_bool("enable_simd"); - - setting.add_predicate("use_m", predicate!(supports_m && enable_m)); - setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics)); - setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float)); - setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float)); - setting.add_predicate( - "full_float", - predicate!(shared_enable_simd && supports_f && supports_d), - ); - - setting.build() -} - -fn define_registers() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("IntRegs", "x") - .units(32) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FloatRegs", "f") - .units(32) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - regs.add_class(builder); - - regs.build() -} - -pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { - let settings = define_settings(&shared_defs.settings); - let regs = define_registers(); - - // CPU modes for 32-bit and 64-bit operation. - let mut rv_32 = CpuMode::new("RV32"); - let mut rv_64 = CpuMode::new("RV64"); - - let expand = shared_defs.transform_groups.by_name("expand"); - let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags"); - - rv_32.legalize_monomorphic(expand); - rv_32.legalize_default(narrow_no_flags); - rv_32.legalize_type(I32, expand); - rv_32.legalize_type(F32, expand); - rv_32.legalize_type(F64, expand); - - rv_64.legalize_monomorphic(expand); - rv_64.legalize_default(narrow_no_flags); - rv_64.legalize_type(I32, expand); - rv_64.legalize_type(I64, expand); - rv_64.legalize_type(F32, expand); - rv_64.legalize_type(F64, expand); - - let recipes = recipes::define(shared_defs, ®s); - - let encodings = encodings::define(shared_defs, &settings, &recipes); - rv_32.set_encodings(encodings.enc32); - rv_64.set_encodings(encodings.enc64); - let encodings_predicates = encodings.inst_pred_reg.extract(); - - let recipes = recipes.collect(); - - let cpu_modes = vec![rv_32, rv_64]; - - TargetIsa::new( - "riscv", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) -} diff --git a/cranelift/codegen/meta/src/isa/riscv/recipes.rs b/cranelift/codegen/meta/src/isa/riscv/recipes.rs deleted file mode 100644 index dc879dcecb..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/recipes.rs +++ /dev/null @@ -1,280 +0,0 @@ -use std::collections::HashMap; - -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack}; -use crate::cdsl::regs::IsaRegs; -use crate::shared::Definitions as SharedDefinitions; - -/// An helper to create recipes and use them when defining the RISCV encodings. -pub(crate) struct RecipeGroup { - /// The actualy list of recipes explicitly created in this file. - pub recipes: Recipes, - - /// Provides fast lookup from a name to an encoding recipe. - name_to_recipe: HashMap, -} - -impl RecipeGroup { - fn new() -> Self { - Self { - recipes: Recipes::new(), - name_to_recipe: HashMap::new(), - } - } - - fn push(&mut self, builder: EncodingRecipeBuilder) { - assert!( - self.name_to_recipe.get(&builder.name).is_none(), - "riscv recipe '{}' created twice", - builder.name - ); - let name = builder.name.clone(); - let number = self.recipes.push(builder.build()); - self.name_to_recipe.insert(name, number); - } - - pub fn by_name(&self, name: &str) -> EncodingRecipeNumber { - *self - .name_to_recipe - .get(name) - .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name)) - } - - pub fn collect(self) -> Recipes { - self.recipes - } -} - -pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup { - let formats = &shared_defs.formats; - - // Register classes shorthands. - let gpr = regs.class_by_name("GPR"); - - // Definitions. - let mut recipes = RecipeGroup::new(); - - // R-type 32-bit instructions: These are mostly binary arithmetic instructions. - // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8) - recipes.push( - EncodingRecipeBuilder::new("R", &formats.binary, 4) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), - ); - - // R-type with an immediate shift amount instead of rs2. - recipes.push( - EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // R-type encoding of an integer comparison. - recipes.push( - EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), - ); - - recipes.push( - EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 12, - 0, - )) - .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // I-type instruction with a hardcoded %x0 rs1. - recipes.push( - EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.unary_imm, - "imm", - 12, - 0, - )) - .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"), - ); - - // I-type encoding of an integer comparison. - recipes.push( - EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.int_compare_imm, - "imm", - 12, - 0, - )) - .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset. The - // variable return values are not encoded. - recipes.push( - EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit( - r#" - // Return instructions are always a jalr to %x1. - // The return address is provided as a special-purpose link argument. - put_i( - bits, - 1, // rs1 = %x1 - 0, // no offset. - 0, // rd = %x0: no address written. - sink, - ); - "#, - ), - ); - - // I-type encoding for `jalr` as a call_indirect. - recipes.push( - EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4) - .operands_in(vec![gpr]) - .emit( - r#" - // call_indirect instructions are jalr with rd=%x1. - put_i( - bits, - in_reg0, - 0, // no offset. - 1, // rd = %x1: link register. - sink, - ); - "#, - ), - ); - - // Copy of a GPR is implemented as addi x, 0. - recipes.push( - EncodingRecipeBuilder::new("Icopy", &formats.unary, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"), - ); - - // Same for a GPR regmove. - recipes.push( - EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4) - .operands_in(vec![gpr]) - .emit("put_i(bits, src, 0, dst, sink);"), - ); - - // Same for copy-to-SSA -- GPR regmove. - recipes.push( - EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![gpr]) - .emit("put_i(bits, src, 0, out_reg0, sink);"), - ); - - // U-type instructions have a 20-bit immediate that targets bits 12-31. - recipes.push( - EncodingRecipeBuilder::new("U", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.unary_imm, - "imm", - 32, - 12, - )) - .emit("put_u(bits, imm.into(), out_reg0, sink);"), - ); - - // UJ-type unconditional branch instructions. - recipes.push( - EncodingRecipeBuilder::new("UJ", &formats.jump, 4) - .branch_range((0, 21)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_uj(bits, disp, 0, sink); - "#, - ), - ); - - recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit( - r#" - sink.reloc_external(func.srclocs[inst], - Reloc::RiscvCall, - &func.dfg.ext_funcs[func_ref].name, - 0); - // rd=%x1 is the standard link register. - put_uj(bits, 0, 1, sink); - "#, - )); - - // SB-type branch instructions. - recipes.push( - EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4) - .operands_in(vec![gpr, gpr]) - .branch_range((0, 13)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_sb(bits, disp, in_reg0, in_reg1, sink); - "#, - ), - ); - - // SB-type branch instruction with rs2 fixed to zero. - recipes.push( - EncodingRecipeBuilder::new("SBzero", &formats.branch, 4) - .operands_in(vec![gpr]) - .branch_range((0, 13)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_sb(bits, disp, in_reg0, 0, sink); - "#, - ), - ); - - // Spill of a GPR. - recipes.push( - EncodingRecipeBuilder::new("GPsp", &formats.unary, 4) - .operands_in(vec![gpr]) - .operands_out(vec![Stack::new(gpr)]) - .emit("unimplemented!();"), - ); - - // Fill of a GPR. - recipes.push( - EncodingRecipeBuilder::new("GPfi", &formats.unary, 4) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![gpr]) - .emit("unimplemented!();"), - ); - - // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op. - recipes.push( - EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![Stack::new(gpr)]) - .emit(""), - ); - - // No-op fills, created by late-stage redundant-fill removal. - recipes.push( - EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit(""), - ); - - recipes -} diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 29a545aad6..88f7ed6b09 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -119,7 +119,7 @@ pub fn generate( isa::Isa::S390x => { // s390x doesn't have platform-specific settings. } - isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), + isa::Isa::Arm32 => todo!(), } } diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index 62602d5a88..8861d92ce2 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -58,8 +58,6 @@ pub enum Reloc { /// value is sign-extended, multiplied by 4, and added to the PC of /// the call instruction to form the destination address. Arm64Call, - /// RISC-V call target - RiscvCall, /// s390x PC-relative 4-byte offset S390xPCRel32Dbl, @@ -93,7 +91,7 @@ impl fmt::Display for Reloc { Self::X86CallPCRel4 => write!(f, "CallPCRel4"), Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), - Self::Arm32Call | Self::Arm64Call | Self::RiscvCall => write!(f, "Call"), + Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), diff --git a/cranelift/codegen/src/isa/legacy/mod.rs b/cranelift/codegen/src/isa/legacy/mod.rs deleted file mode 100644 index 15900b9509..0000000000 --- a/cranelift/codegen/src/isa/legacy/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Legacy ("old-style") backends that will be removed in the future. - -#[cfg(feature = "riscv")] -pub(crate) mod riscv; diff --git a/cranelift/codegen/src/isa/legacy/riscv/abi.rs b/cranelift/codegen/src/isa/legacy/riscv/abi.rs deleted file mode 100644 index 44c5f36afe..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/abi.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! RISC-V ABI implementation. -//! -//! This module implements the RISC-V calling convention through the primary `legalize_signature()` -//! entry point. -//! -//! This doesn't support the soft-float ABI at the moment. - -use super::registers::{FPR, GPR}; -use super::settings; -use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; -use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type}; -use crate::isa::RegClass; -use crate::regalloc::RegisterSet; -use alloc::borrow::Cow; -use core::i32; -use target_lexicon::Triple; - -struct Args { - pointer_bits: u8, - pointer_bytes: u8, - pointer_type: Type, - regs: u32, - reg_limit: u32, - offset: u32, -} - -impl Args { - fn new(bits: u8, enable_e: bool) -> Self { - Self { - pointer_bits: bits, - pointer_bytes: bits / 8, - pointer_type: Type::int(u16::from(bits)).unwrap(), - regs: 0, - reg_limit: if enable_e { 6 } else { 8 }, - offset: 0, - } - } -} - -impl ArgAssigner for Args { - fn assign(&mut self, arg: &AbiParam) -> ArgAction { - fn align(value: u32, to: u32) -> u32 { - (value + to - 1) & !(to - 1) - } - - let ty = arg.value_type; - - // Check for a legal type. - // RISC-V doesn't have SIMD at all, so break all vectors down. - if ty.is_vector() { - return ValueConversion::VectorSplit.into(); - } - - // Large integers and booleans are broken down to fit in a register. - if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) { - // Align registers and stack to a multiple of two pointers. - self.regs = align(self.regs, 2); - self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes)); - return ValueConversion::IntSplit.into(); - } - - // Small integers are extended to the size of a pointer register. - if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) { - match arg.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), - ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), - } - } - - if self.regs < self.reg_limit { - // Assign to a register. - let reg = if ty.is_float() { - FPR.unit(10 + self.regs as usize) - } else { - GPR.unit(10 + self.regs as usize) - }; - self.regs += 1; - ArgumentLoc::Reg(reg).into() - } else { - // Assign a stack location. - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += u32::from(self.pointer_bytes); - debug_assert!(self.offset <= i32::MAX as u32); - loc.into() - } - } -} - -/// Legalize `sig` for RISC-V. -pub fn legalize_signature( - sig: &mut Cow, - triple: &Triple, - isa_flags: &settings::Flags, - current: bool, -) { - let bits = triple.pointer_width().unwrap().bits(); - - let mut args = Args::new(bits, isa_flags.enable_e()); - if let Some(new_params) = legalize_args(&sig.params, &mut args) { - sig.to_mut().params = new_params; - } - - let mut rets = Args::new(bits, isa_flags.enable_e()); - if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - sig.to_mut().returns = new_returns; - } - - if current { - let ptr = Type::int(u16::from(bits)).unwrap(); - - // Add the link register as an argument and return value. - // - // The `jalr` instruction implementing a return can technically accept the return address - // in any register, but a micro-architecture with a return address predictor will only - // recognize it as a return if the address is in `x1`. - let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1)); - sig.to_mut().params.push(link); - sig.to_mut().returns.push(link); - } -} - -/// Get register class for a type appearing in a legalized signature. -pub fn regclass_for_abi_type(ty: Type) -> RegClass { - if ty.is_float() { - FPR - } else { - GPR - } -} - -pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet { - let mut regs = RegisterSet::new(); - regs.take(GPR, GPR.unit(0)); // Hard-wired 0. - // %x1 is the link register which is available for allocation. - regs.take(GPR, GPR.unit(2)); // Stack pointer. - regs.take(GPR, GPR.unit(3)); // Global pointer. - regs.take(GPR, GPR.unit(4)); // Thread pointer. - // TODO: %x8 is the frame pointer. Reserve it? - - // Remove %x16 and up for RV32E. - if isa_flags.enable_e() { - for u in 16..32 { - regs.take(GPR, GPR.unit(u)); - } - } - - regs -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/binemit.rs b/cranelift/codegen/src/isa/legacy/riscv/binemit.rs deleted file mode 100644 index a1d2b82e12..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/binemit.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Emitting binary RISC-V machine code. - -use crate::binemit::{bad_encoding, CodeSink, Reloc}; -use crate::ir::{Function, Inst, InstructionData}; -use crate::isa::{RegUnit, StackBaseMask, StackRef, TargetIsa}; -use crate::predicates::is_signed_int; -use crate::regalloc::RegDiversions; -use core::u32; - -include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs")); - -/// R-type instructions. -/// -/// 31 24 19 14 11 6 -/// funct7 rs2 rs1 funct3 rd opcode -/// 25 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. -fn put_r(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let funct7 = (bits >> 8) & 0x7f; - let rs1 = u32::from(rs1) & 0x1f; - let rs2 = u32::from(rs2) & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= rs2 << 20; - i |= funct7 << 25; - - sink.put4(i); -} - -/// R-type instructions with a shift amount instead of rs2. -/// -/// 31 25 19 14 11 6 -/// funct7 shamt rs1 funct3 rd opcode -/// 25 20 15 12 7 0 -/// -/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31. -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. -fn put_rshamt( - bits: u16, - rs1: RegUnit, - shamt: i64, - rd: RegUnit, - sink: &mut CS, -) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let funct7 = (bits >> 8) & 0x7f; - let rs1 = u32::from(rs1) & 0x1f; - let shamt = shamt as u32 & 0x3f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= shamt << 20; - i |= funct7 << 25; - - sink.put4(i); -} - -/// I-type instructions. -/// -/// 31 19 14 11 6 -/// imm rs1 funct3 rd opcode -/// 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_i(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let rs1 = u32::from(rs1) & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= (imm << 20) as u32; - - sink.put4(i); -} - -/// U-type instructions. -/// -/// 31 11 6 -/// imm rd opcode -/// 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_u(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= imm as u32 & 0xfffff000; - - sink.put4(i); -} - -/// SB-type branch instructions. -/// -/// 31 24 19 14 11 6 -/// imm rs2 rs1 funct3 imm opcode -/// 25 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_sb(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let rs1 = u32::from(rs1) & 0x1f; - let rs2 = u32::from(rs2) & 0x1f; - - debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm); - let imm = imm as u32; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= funct3 << 12; - i |= rs1 << 15; - i |= rs2 << 20; - - // The displacement is completely hashed up. - i |= ((imm >> 11) & 0x1) << 7; - i |= ((imm >> 1) & 0xf) << 8; - i |= ((imm >> 5) & 0x3f) << 25; - i |= ((imm >> 12) & 0x1) << 31; - - sink.put4(i); -} - -/// UJ-type jump instructions. -/// -/// 31 11 6 -/// imm rd opcode -/// 12 7 0 -/// -/// Encoding bits: `opcode[6:2]` -fn put_uj(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let rd = u32::from(rd) & 0x1f; - - debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm); - let imm = imm as u32; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - - // The displacement is completely hashed up. - i |= imm & 0xff000; - i |= ((imm >> 11) & 0x1) << 20; - i |= ((imm >> 1) & 0x3ff) << 21; - i |= ((imm >> 20) & 0x1) << 31; - - sink.put4(i); -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs b/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs deleted file mode 100644 index 76184ad727..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Encoding tables for RISC-V. - -use super::registers::*; -use crate::ir; -use crate::isa; -use crate::isa::constraints::*; -use crate::isa::enc_tables::*; -use crate::isa::encoding::{base_size, RecipeSizing}; -use crate::predicates; - -// Include the generated encoding tables: -// - `LEVEL1_RV32` -// - `LEVEL1_RV64` -// - `LEVEL2` -// - `ENCLIST` -// - `INFO` -include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs")); -include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs")); diff --git a/cranelift/codegen/src/isa/legacy/riscv/mod.rs b/cranelift/codegen/src/isa/legacy/riscv/mod.rs deleted file mode 100644 index 2c1ebf1c85..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/mod.rs +++ /dev/null @@ -1,304 +0,0 @@ -//! RISC-V Instruction Set Architecture. - -mod abi; -mod binemit; -mod enc_tables; -mod registers; -pub mod settings; - -use super::super::settings as shared_settings; -#[cfg(feature = "testing_hooks")] -use crate::binemit::CodeSink; -use crate::binemit::{emit_function, MemoryCodeSink}; -use crate::ir; -use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; -use crate::isa::Builder as IsaBuilder; -use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; -use crate::regalloc; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; -use core::fmt; -use core::hash::{Hash, Hasher}; -use target_lexicon::{PointerWidth, Triple}; - -#[allow(dead_code)] -struct Isa { - triple: Triple, - shared_flags: shared_settings::Flags, - isa_flags: settings::Flags, - cpumode: &'static [shared_enc_tables::Level1Entry], -} - -/// Get an ISA builder for creating RISC-V targets. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - IsaBuilder { - triple, - setup: settings::builder(), - constructor: isa_constructor, - } -} - -fn isa_constructor( - triple: Triple, - shared_flags: shared_settings::Flags, - builder: shared_settings::Builder, -) -> Box { - let level1 = match triple.pointer_width().unwrap() { - PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"), - PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..], - PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..], - }; - Box::new(Isa { - triple, - isa_flags: settings::Flags::new(&shared_flags, builder), - shared_flags, - cpumode: level1, - }) -} - -impl TargetIsa for Isa { - fn name(&self) -> &'static str { - "riscv" - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.shared_flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.shared_flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - - fn register_info(&self) -> RegInfo { - registers::INFO.clone() - } - - fn encoding_info(&self) -> EncInfo { - enc_tables::INFO.clone() - } - - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - lookup_enclist( - ctrl_typevar, - inst, - func, - self.cpumode, - &enc_tables::LEVEL2[..], - &enc_tables::ENCLISTS[..], - &enc_tables::LEGALIZE_ACTIONS[..], - &enc_tables::RECIPE_PREDICATES[..], - &enc_tables::INST_PREDICATES[..], - self.isa_flags.predicate_view(), - ) - } - - fn legalize_signature(&self, sig: &mut Cow, current: bool) { - abi::legalize_signature(sig, &self.triple, &self.isa_flags, current) - } - - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { - abi::regclass_for_abi_type(ty) - } - - fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet { - abi::allocatable_registers(func, &self.isa_flags) - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn CodeSink, - ) { - binemit::emit_inst(func, inst, divert, sink, self) - } - - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { - emit_function(func, binemit::emit_inst, sink, self) - } - - fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { - unimplemented!() - } - - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - unimplemented!() - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } -} - -#[cfg(test)] -mod tests { - use crate::ir::{immediates, types}; - use crate::ir::{Function, InstructionData, Opcode}; - use crate::isa; - use crate::settings::{self, Configurable}; - use alloc::string::{String, ToString}; - use core::str::FromStr; - use target_lexicon::triple; - - fn encstr(isa: &dyn isa::TargetIsa, enc: Result) -> String { - match enc { - Ok(e) => isa.encoding_info().display(e).to_string(), - Err(_) => "no encoding".to_string(), - } - } - - #[test] - fn test_64bitenc() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - let isa = isa::lookup(triple!("riscv64")) - .unwrap() - .finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg64 = func.dfg.append_block_param(block, types::I64); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Try to encode iadd_imm.i64 v1, -10. - let inst64 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10), - }; - - // ADDI is I/0b00100 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst64, types::I64)), - "Ii#04" - ); - - // Try to encode iadd_imm.i64 v1, -10000. - let inst64_large = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10000), - }; - - // Immediate is out of range for ADDI. - assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); - - // Create an iadd_imm.i32 which is encodable in RV64. - let inst32 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg32, - imm: immediates::Imm64::new(10), - }; - - // ADDIW is I/0b00110 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst32, types::I32)), - "Ii#06" - ); - } - - // Same as above, but for RV32. - #[test] - fn test_32bitenc() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - let isa = isa::lookup(triple!("riscv32")) - .unwrap() - .finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg64 = func.dfg.append_block_param(block, types::I64); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Try to encode iadd_imm.i64 v1, -10. - let inst64 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10), - }; - - // In 32-bit mode, an i64 bit add should be narrowed. - assert!(isa.encode(&func, &inst64, types::I64).is_err()); - - // Try to encode iadd_imm.i64 v1, -10000. - let inst64_large = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10000), - }; - - // In 32-bit mode, an i64 bit add should be narrowed. - assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); - - // Create an iadd_imm.i32 which is encodable in RV32. - let inst32 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg32, - imm: immediates::Imm64::new(10), - }; - - // ADDI is I/0b00100 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst32, types::I32)), - "Ii#04" - ); - - // Create an imul.i32 which is encodable in RV32, but only when use_m is true. - let mul32 = InstructionData::Binary { - opcode: Opcode::Imul, - args: [arg32, arg32], - }; - - assert!(isa.encode(&func, &mul32, types::I32).is_err()); - } - - #[test] - fn test_rv32m() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - - // Set the supports_m stting which in turn enables the use_m predicate that unlocks - // encodings for imul. - let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap(); - isa_builder.enable("supports_m").unwrap(); - - let isa = isa_builder.finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Create an imul.i32 which is encodable in RV32M. - let mul32 = InstructionData::Binary { - opcode: Opcode::Imul, - args: [arg32, arg32], - }; - assert_eq!( - encstr(&*isa, isa.encode(&func, &mul32, types::I32)), - "R#10c" - ); - } -} - -impl fmt::Display for Isa { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}", self.shared_flags, self.isa_flags) - } -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/registers.rs b/cranelift/codegen/src/isa/legacy/riscv/registers.rs deleted file mode 100644 index 9043b7f65f..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/registers.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! RISC-V register descriptions. - -use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; - -include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs")); - -#[cfg(test)] -mod tests { - use super::{FPR, GPR, INFO}; - use crate::isa::RegUnit; - use alloc::string::{String, ToString}; - - #[test] - fn unit_encodings() { - assert_eq!(INFO.parse_regunit("x0"), Some(0)); - assert_eq!(INFO.parse_regunit("x31"), Some(31)); - assert_eq!(INFO.parse_regunit("f0"), Some(32)); - assert_eq!(INFO.parse_regunit("f31"), Some(63)); - - assert_eq!(INFO.parse_regunit("x32"), None); - assert_eq!(INFO.parse_regunit("f32"), None); - } - - #[test] - fn unit_names() { - fn uname(ru: RegUnit) -> String { - INFO.display_regunit(ru).to_string() - } - - assert_eq!(uname(0), "%x0"); - assert_eq!(uname(1), "%x1"); - assert_eq!(uname(31), "%x31"); - assert_eq!(uname(32), "%f0"); - assert_eq!(uname(33), "%f1"); - assert_eq!(uname(63), "%f31"); - assert_eq!(uname(64), "%INVALID64"); - } - - #[test] - fn classes() { - assert!(GPR.contains(GPR.unit(0))); - assert!(GPR.contains(GPR.unit(31))); - assert!(!FPR.contains(GPR.unit(0))); - assert!(!FPR.contains(GPR.unit(31))); - assert!(!GPR.contains(FPR.unit(0))); - assert!(!GPR.contains(FPR.unit(31))); - assert!(FPR.contains(FPR.unit(0))); - assert!(FPR.contains(FPR.unit(31))); - } -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/settings.rs b/cranelift/codegen/src/isa/legacy/riscv/settings.rs deleted file mode 100644 index 3da9f491fd..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/settings.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! RISC-V Settings. - -use crate::settings::{self, detail, Builder, Value}; -use core::fmt; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -// `cranelift-codegen/meta/src/isa/riscv/mod.rs`. -include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs")); - -#[cfg(test)] -mod tests { - use super::{builder, Flags}; - use crate::settings::{self, Configurable}; - use alloc::string::ToString; - - #[test] - fn display_default() { - let shared = settings::Flags::new(settings::builder()); - let b = builder(); - let f = Flags::new(&shared, b); - assert_eq!( - f.to_string(), - "[riscv]\n\ - supports_m = false\n\ - supports_a = false\n\ - supports_f = false\n\ - supports_d = false\n\ - enable_m = true\n\ - enable_e = false\n" - ); - // Predicates are not part of the Display output. - assert_eq!(f.full_float(), false); - } - - #[test] - fn predicates() { - let mut sb = settings::builder(); - sb.set("enable_simd", "true").unwrap(); - let shared = settings::Flags::new(sb); - let mut b = builder(); - b.enable("supports_f").unwrap(); - b.enable("supports_d").unwrap(); - let f = Flags::new(&shared, b); - assert_eq!(f.full_float(), true); - - let mut sb = settings::builder(); - sb.set("enable_simd", "false").unwrap(); - let shared = settings::Flags::new(sb); - let mut b = builder(); - b.enable("supports_f").unwrap(); - b.enable("supports_d").unwrap(); - let f = Flags::new(&shared, b); - assert_eq!(f.full_float(), false); - } -} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 92edfd744f..069324f040 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -84,12 +84,6 @@ pub(crate) mod aarch64; #[cfg(feature = "s390x")] mod s390x; -#[cfg(feature = "riscv")] -mod legacy; - -#[cfg(feature = "riscv")] -use legacy::riscv; - pub mod unwind; mod call_conv; @@ -121,9 +115,6 @@ macro_rules! isa_builder { /// by `variant` if available. pub fn lookup_variant(triple: Triple) -> Result { match triple.architecture { - Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => { - isa_builder!(riscv, (feature = "riscv"), triple) - } Architecture::X86_64 => { isa_builder!(x64, (feature = "x86"), triple) } diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index ee89ee35f0..0ba70915a1 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -13,19 +13,12 @@ //! The legalizer does not deal with register allocation constraints. These constraints are derived //! from the encoding recipes, and solved later by the register allocator. -#[cfg(any(feature = "x86", feature = "riscv"))] -use crate::bitset::BitSet; use crate::cursor::{Cursor, FuncCursor}; use crate::flowgraph::ControlFlowGraph; use crate::ir::types::{I32, I64}; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; -#[cfg(feature = "riscv")] -use crate::predicates; -#[cfg(feature = "riscv")] -use alloc::vec::Vec; - use crate::timing; use alloc::collections::BTreeSet; @@ -37,8 +30,6 @@ mod libcall; mod split; mod table; -#[cfg(any(feature = "x86", feature = "riscv"))] -use self::call::expand_call; use self::globalvalue::expand_global_value; use self::heap::expand_heap_addr; pub(crate) use self::libcall::expand_as_libcall; diff --git a/cranelift/filetests/filetests/isa/riscv/abi-e.clif b/cranelift/filetests/filetests/isa/riscv/abi-e.clif deleted file mode 100644 index fcd762ee81..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/abi-e.clif +++ /dev/null @@ -1,14 +0,0 @@ -; Test the legalization of function signatures for RV32E. -test legalizer -target riscv32 enable_e - -; regex: V=v\d+ - -function %f() { - ; Spilling into the stack args after %x15 since %16 and up are not - ; available in RV32E. - sig0 = (i64, i64, i64, i64) -> i64 system_v - ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] system_v -block0: - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/abi.clif b/cranelift/filetests/filetests/isa/riscv/abi.clif deleted file mode 100644 index d9469f490e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/abi.clif +++ /dev/null @@ -1,32 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - - ; The i64 argument must go in an even-odd register pair. - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - - ; Spilling into the stack args. - sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 system_v - ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] system_v - - ; Splitting vectors. - sig4 = (i32x4) system_v - ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) system_v - - ; Splitting vectors, then splitting ints. - sig5 = (i64x4) system_v - ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) system_v - -block0: - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/binary32.clif b/cranelift/filetests/filetests/isa/riscv/binary32.clif deleted file mode 100644 index 5a69c4289b..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/binary32.clif +++ /dev/null @@ -1,189 +0,0 @@ -; Binary emission of 32-bit code. -test binemit -target riscv32 - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - sig0 = () - fn0 = %foo() - -block0(v9999: i32): - [-,%x10] v1 = iconst.i32 1 - [-,%x21] v2 = iconst.i32 2 - - ; Integer Register-Register Operations. - ; add - [-,%x7] v10 = iadd v1, v2 ; bin: 015503b3 - [-,%x16] v11 = iadd v2, v1 ; bin: 00aa8833 - ; sub - [-,%x7] v12 = isub v1, v2 ; bin: 415503b3 - [-,%x16] v13 = isub v2, v1 ; bin: 40aa8833 - ; and - [-,%x7] v20 = band v1, v2 ; bin: 015573b3 - [-,%x16] v21 = band v2, v1 ; bin: 00aaf833 - ; or - [-,%x7] v22 = bor v1, v2 ; bin: 015563b3 - [-,%x16] v23 = bor v2, v1 ; bin: 00aae833 - ; xor - [-,%x7] v24 = bxor v1, v2 ; bin: 015543b3 - [-,%x16] v25 = bxor v2, v1 ; bin: 00aac833 - ; sll - [-,%x7] v30 = ishl v1, v2 ; bin: 015513b3 - [-,%x16] v31 = ishl v2, v1 ; bin: 00aa9833 - ; srl - [-,%x7] v32 = ushr v1, v2 ; bin: 015553b3 - [-,%x16] v33 = ushr v2, v1 ; bin: 00aad833 - ; sra - [-,%x7] v34 = sshr v1, v2 ; bin: 415553b3 - [-,%x16] v35 = sshr v2, v1 ; bin: 40aad833 - ; slt - [-,%x7] v42 = icmp slt v1, v2 ; bin: 015523b3 - [-,%x16] v43 = icmp slt v2, v1 ; bin: 00aaa833 - ; sltu - [-,%x7] v44 = icmp ult v1, v2 ; bin: 015533b3 - [-,%x16] v45 = icmp ult v2, v1 ; bin: 00aab833 - - ; Integer Register-Immediate Instructions - - ; addi - [-,%x7] v100 = iadd_imm v1, 1000 ; bin: 3e850393 - [-,%x16] v101 = iadd_imm v2, -905 ; bin: c77a8813 - ; andi - [-,%x7] v110 = band_imm v1, 1000 ; bin: 3e857393 - [-,%x16] v111 = band_imm v2, -905 ; bin: c77af813 - ; ori - [-,%x7] v112 = bor_imm v1, 1000 ; bin: 3e856393 - [-,%x16] v113 = bor_imm v2, -905 ; bin: c77ae813 - ; xori - [-,%x7] v114 = bxor_imm v1, 1000 ; bin: 3e854393 - [-,%x16] v115 = bxor_imm v2, -905 ; bin: c77ac813 - - ; slli - [-,%x7] v120 = ishl_imm v1, 31 ; bin: 01f51393 - [-,%x16] v121 = ishl_imm v2, 8 ; bin: 008a9813 - ; srli - [-,%x7] v122 = ushr_imm v1, 31 ; bin: 01f55393 - [-,%x16] v123 = ushr_imm v2, 8 ; bin: 008ad813 - ; srai - [-,%x7] v124 = sshr_imm v1, 31 ; bin: 41f55393 - [-,%x16] v125 = sshr_imm v2, 8 ; bin: 408ad813 - - ; slti - [-,%x7] v130 = icmp_imm slt v1, 1000 ; bin: 3e852393 - [-,%x16] v131 = icmp_imm slt v2, -905 ; bin: c77aa813 - ; sltiu - [-,%x7] v132 = icmp_imm ult v1, 1000 ; bin: 3e853393 - [-,%x16] v133 = icmp_imm ult v2, -905 ; bin: c77ab813 - - ; lui - [-,%x7] v140 = iconst.i32 0x12345000 ; bin: 123453b7 - [-,%x16] v141 = iconst.i32 0xffffffff_fedcb000 ; bin: fedcb837 - ; addi - [-,%x7] v142 = iconst.i32 1000 ; bin: 3e800393 - [-,%x16] v143 = iconst.i32 -905 ; bin: c7700813 - - ; Copies alias to iadd_imm. - [-,%x7] v150 = copy v1 ; bin: 00050393 - [-,%x16] v151 = copy v2 ; bin: 000a8813 - - ; Control Transfer Instructions - - ; jal %x1, fn0 - call fn0() ; bin: Call(%foo) 000000ef - - ; jalr %x1, %x10 - call_indirect sig0, v1() ; bin: 000500e7 - call_indirect sig0, v2() ; bin: 000a80e7 - - brz v1, block3 - fallthrough block4 - -block4: - brnz v1, block1 - fallthrough block5 - -block5: - ; jalr %x0, %x1, 0 - return v9999 ; bin: 00008067 - -block1: - ; beq 0x000 - br_icmp eq v1, v2, block1 ; bin: 01550063 - fallthrough block100 - -block100: - ; bne 0xffc - br_icmp ne v1, v2, block1 ; bin: ff551ee3 - fallthrough block101 - -block101: - ; blt 0xff8 - br_icmp slt v1, v2, block1 ; bin: ff554ce3 - fallthrough block102 - -block102: - ; bge 0xff4 - br_icmp sge v1, v2, block1 ; bin: ff555ae3 - fallthrough block103 - -block103: - ; bltu 0xff0 - br_icmp ult v1, v2, block1 ; bin: ff5568e3 - fallthrough block104 - -block104: - ; bgeu 0xfec - br_icmp uge v1, v2, block1 ; bin: ff5576e3 - fallthrough block105 - -block105: - - ; Forward branches. - fallthrough block106 - -block106: - ; beq 0x018 - br_icmp eq v2, v1, block2 ; bin: 00aa8c63 - fallthrough block107 - -block107: - ; bne 0x014 - br_icmp ne v2, v1, block2 ; bin: 00aa9a63 - fallthrough block108 - -block108: - ; blt 0x010 - br_icmp slt v2, v1, block2 ; bin: 00aac863 - fallthrough block109 - -block109: - ; bge 0x00c - br_icmp sge v2, v1, block2 ; bin: 00aad663 - fallthrough block110 - -block110: - ; bltu 0x008 - br_icmp ult v2, v1, block2 ; bin: 00aae463 - fallthrough block111 - -block111: - ; bgeu 0x004 - br_icmp uge v2, v1, block2 ; bin: 00aaf263 - - fallthrough block2 - -block2: - ; jal %x0, 0x00000 - jump block2 ; bin: 0000006f - -block3: - ; beq x, %x0 - brz v1, block3 ; bin: 00050063 - fallthrough block6 - -block6: - ; bne x, %x0 - brnz v1, block3 ; bin: fe051ee3 - - ; jal %x0, 0x1ffff4 - jump block2 ; bin: ff5ff06f -} diff --git a/cranelift/filetests/filetests/isa/riscv/encoding.clif b/cranelift/filetests/filetests/isa/riscv/encoding.clif deleted file mode 100644 index b8c991f52e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/encoding.clif +++ /dev/null @@ -1,21 +0,0 @@ -test legalizer -target riscv32 supports_m=1 - -function %int32(i32, i32) { -block0(v1: i32, v2: i32): - v10 = iadd v1, v2 - ; check: [R#0c] - ; sameln: v10 = iadd - - v11 = isub v1, v2 - ; check: [R#200c] - ; sameln: v11 = isub - - v12 = imul v1, v2 - ; check: [R#10c] - ; sameln: v12 = imul - - return - ; check: [Iret#19] - ; sameln: return -} diff --git a/cranelift/filetests/filetests/isa/riscv/expand-i32.clif b/cranelift/filetests/filetests/isa/riscv/expand-i32.clif deleted file mode 100644 index ee62bc093f..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/expand-i32.clif +++ /dev/null @@ -1,37 +0,0 @@ -; Test the legalization of i32 instructions that don't have RISC-V versions. -test legalizer - -target riscv32 supports_m=1 - -target riscv64 supports_m=1 - -; regex: V=v\d+ - -function %carry_out(i32, i32) -> i32, b1 { -block0(v1: i32, v2: i32): - v3, v4 = iadd_cout v1, v2 - return v3, v4 -} -; check: v3 = iadd v1, v2 -; check: v4 = icmp ult v3, v1 -; check: return v3, v4 - -; Expanding illegal immediate constants. -; Note that at some point we'll probably expand the iconst as well. -function %large_imm(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 1000000000 - return v1 -} -; check: $(cst=$V) = iconst.i32 0x3b9a_ca00 -; check: v1 = iadd v0, $cst -; check: return v1 - -function %bitclear(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = band_not v0, v1 - ; check: iconst.i32 -1 - ; check: bxor - ; check: band - return v2 -} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif b/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif deleted file mode 100644 index 5ea4cd002e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif +++ /dev/null @@ -1,134 +0,0 @@ -; Test legalizer's handling of ABI boundaries. -test legalizer -target riscv32 - -; regex: V=v\d+ -; regex: SS=ss\d+ -; regex: WS=\s+ - -function %int_split_args(i64) -> i64 { -block0(v0: i64): - ; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): - ; check: v0 = iconcat $v0l, $v0h - v1 = iadd_imm v0, 1 - ; check: $(v1l=$V), $(v1h=$V) = isplit v1 - ; check: return $v1l, $v1h, $link - return v1 -} - -function %split_call_arg(i32) { - fn1 = %foo(i64) - fn2 = %foo(i32, i64) -block0(v0: i32): - v1 = uextend.i64 v0 - call fn1(v1) - ; check: $(v1h=$V) = iconst.i32 0 - ; check: call fn1(v0, $v1h) - call fn2(v0, v1) - ; check: call fn2(v0, $V, $V) - return -} - -function %split_ret_val() { - fn1 = %foo() -> i64 -block0: - v1 = call fn1() - ; check: block0($(link=$V): i32): - ; nextln: $(v1l=$V), $(v1h=$V) = call fn1() - ; check: v1 = iconcat $v1l, $v1h - jump block1(v1) - ; check: jump block1(v1) - -block1(v10: i64): - jump block1(v10) -} - -; First return value is fine, second one is expanded. -function %split_ret_val2() { - fn1 = %foo() -> i32, i64 -block0: - v1, v2 = call fn1() - ; check: block0($(link=$V): i32): - ; nextln: v1, $(v2l=$V), $(v2h=$V) = call fn1() - ; check: v2 = iconcat $v2l, $v2h - jump block1(v1, v2) - ; check: jump block1(v1, v2) - -block1(v9: i32, v10: i64): - jump block1(v9, v10) -} - -function %int_ext(i8, i8 sext, i8 uext) -> i8 uext { -block0(v1: i8, v2: i8, v3: i8): - ; check: block0(v1: i8, $(v2x=$V): i32, $(v3x=$V): i32, $(link=$V): i32): - ; check: v2 = ireduce.i8 $v2x - ; check: v3 = ireduce.i8 $v3x - ; check: $(v1x=$V) = uextend.i32 v1 - ; check: return $v1x, $link - return v1 -} - -; Function produces single return value, still need to copy. -function %ext_ret_val() { - fn1 = %foo() -> i8 sext -block0: - v1 = call fn1() - ; check: block0($V: i32): - ; nextln: $(rv=$V) = call fn1() - ; check: v1 = ireduce.i8 $rv - jump block1(v1) - ; check: jump block1(v1) - -block1(v10: i8): - jump block1(v10) -} - -function %vector_split_args(i64x4) -> i64x4 { -block0(v0: i64x4): - ; check: block0($(v0al=$V): i32, $(v0ah=$V): i32, $(v0bl=$V): i32, $(v0bh=$V): i32, $(v0cl=$V): i32, $(v0ch=$V): i32, $(v0dl=$V): i32, $(v0dh=$V): i32, $(link=$V): i32): - ; check: $(v0a=$V) = iconcat $v0al, $v0ah - ; check: $(v0b=$V) = iconcat $v0bl, $v0bh - ; check: $(v0ab=$V) = vconcat $v0a, $v0b - ; check: $(v0c=$V) = iconcat $v0cl, $v0ch - ; check: $(v0d=$V) = iconcat $v0dl, $v0dh - ; check: $(v0cd=$V) = vconcat $v0c, $v0d - ; check: v0 = vconcat $v0ab, $v0cd - v1 = bxor v0, v0 - ; check: $(v1ab=$V), $(v1cd=$V) = vsplit v1 - ; check: $(v1a=$V), $(v1b=$V) = vsplit $v1ab - ; check: $(v1al=$V), $(v1ah=$V) = isplit $v1a - ; check: $(v1bl=$V), $(v1bh=$V) = isplit $v1b - ; check: $(v1c=$V), $(v1d=$V) = vsplit $v1cd - ; check: $(v1cl=$V), $(v1ch=$V) = isplit $v1c - ; check: $(v1dl=$V), $(v1dh=$V) = isplit $v1d - ; check: return $v1al, $v1ah, $v1bl, $v1bh, $v1cl, $v1ch, $v1dl, $v1dh, $link - return v1 -} - -function %indirect(i32) { - sig1 = () system_v -block0(v0: i32): - call_indirect sig1, v0() - return -} - -; The first argument to call_indirect doesn't get altered. -function %indirect_arg(i32, f32x2) { - sig1 = (f32x2) system_v -block0(v0: i32, v1: f32x2): - call_indirect sig1, v0(v1) - ; check: call_indirect sig1, v0($V, $V) - return -} - -; Call a function that takes arguments on the stack. -function %stack_args(i32) { - ; check: $(ss0=$SS) = outgoing_arg 4 - fn1 = %foo(i64, i64, i64, i64, i32) -block0(v0: i32): - v1 = iconst.i64 1 - call fn1(v1, v1, v1, v1, v0) - ; check: [GPsp#48,$ss0]$WS $(v0s=$V) = spill v0 - ; check: call fn1($(=.*), $v0s) - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif deleted file mode 100644 index 11b31218be..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif +++ /dev/null @@ -1,64 +0,0 @@ -; Test the legalization of i64 arithmetic instructions. -test legalizer -target riscv32 supports_m=1 - -; regex: V=v\d+ - -function %bitwise_and(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = band v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#ec -; sameln: $(v3l=$V) = band $v1l, $v2l -; check: [R#ec -; sameln: $(v3h=$V) = band $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %bitwise_or(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = bor v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#cc -; sameln: $(v3l=$V) = bor $v1l, $v2l -; check: [R#cc -; sameln: $(v3h=$V) = bor $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %bitwise_xor(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = bxor v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#8c -; sameln: $(v3l=$V) = bxor $v1l, $v2l -; check: [R#8c -; sameln: $(v3h=$V) = bxor $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %arith_add(i64, i64) -> i64 { -; Legalizing iadd.i64 requires two steps: -; 1. Narrow to iadd_cout.i32, then -; 2. Expand iadd_cout.i32 since RISC-V has no carry flag. -block0(v1: i64, v2: i64): - v3 = iadd v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#0c -; sameln: $(v3l=$V) = iadd $v1l, $v2l -; check: $(c=$V) = icmp ult $v3l, $v1l -; check: [R#0c -; sameln: $(v3h1=$V) = iadd $v1h, $v2h -; check: $(c_int=$V) = bint.i32 $c -; check: [R#0c -; sameln: $(v3h=$V) = iadd $v3h1, $c_int -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif deleted file mode 100644 index d7250cb3af..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif +++ /dev/null @@ -1,55 +0,0 @@ -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %icmp_imm_eq(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm eq v0, 0x20202020_10101010 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x1010_1010 -; nextln: $(imm_high=$V) = iconst.i32 0x2020_2020 -; nextln: $(v3=$V) = icmp eq $(v2l), $(imm_low) -; nextln: $(v4=$V) = icmp eq $(v2h), $(imm_high) -; nextln: v1 = band $(v3), $(v4) -; nextln: return v1, $(link) - -function %icmp_imm_ne(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm ne v0, 0x33333333_44444444 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x4444_4444 -; nextln: $(imm_high=$V) = iconst.i32 0x3333_3333 -; nextln: $(v3=$V) = icmp ne $(v2l), $(imm_low) -; nextln: $(v4=$V) = icmp ne $(v2h), $(imm_high) -; nextln: v1 = bor $(v3), $(v4) -; nextln: return v1, $(link) - -function %icmp_imm_sge(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm sge v0, 0x01020304_05060708 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x0506_0708 -; nextln: $(imm_high=$V) = iconst.i32 0x0102_0304 -; nextln: $(v3=$V) = icmp sgt $(v2h), $(imm_high) -; nextln: $(v4=$V) = icmp slt $(v2h), $(imm_high) -; nextln: $(v5=$V) = icmp uge $(v2l), $(imm_low) -; nextln: $(v6=$V) = bnot $v4 -; nextln: $(v7=$V) = band $v6, $v5 -; nextln: v1 = bor $(v3), $(v7) -; nextln: return v1, $(link) diff --git a/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif b/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif deleted file mode 100644 index 21cd828b8a..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif +++ /dev/null @@ -1,36 +0,0 @@ -; Test the parser's support for encoding annotations. -test legalizer -target riscv32 - -function %parse_encoding(i32 [%x5]) -> i32 [%x10] { - ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] fast { - - sig0 = (i32 [%x10]) -> i32 [%x10] system_v - ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v - - sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - - sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - - ; Arguments on stack where not necessary - sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v - ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v - - ; Stack argument before register argument - sig4 = (f32 [72], i32 [%x10]) system_v - ; check: sig4 = (f32 [72], i32 [%x10]) system_v - - ; Return value on stack - sig5 = () -> f32 [0] system_v - ; check: sig5 = () -> f32 [0] system_v - - ; function + signature - fn0 = %bar(i32 [%x10]) -> b1 [%x10] system_v - ; check: sig6 = (i32 [%x10]) -> b1 [%x10] system_v - ; nextln: fn0 = %bar sig6 - -block0(v0: i32): - return v0 -} diff --git a/cranelift/filetests/filetests/isa/riscv/regmove.clif b/cranelift/filetests/filetests/isa/riscv/regmove.clif deleted file mode 100644 index f1509e8178..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/regmove.clif +++ /dev/null @@ -1,15 +0,0 @@ -; Test tracking of register moves. -test binemit -target riscv32 - -function %regmoves(i32 link [%x1]) -> i32 link [%x1] { -block0(v9999: i32): - [-,%x10] v1 = iconst.i32 1 - [-,%x7] v2 = iadd_imm v1, 1000 ; bin: 3e850393 - regmove v1, %x10 -> %x11 ; bin: 00050593 - [-,%x7] v3 = iadd_imm v1, 1000 ; bin: 3e858393 - regmove v1, %x11 -> %x10 ; bin: 00058513 - [-,%x7] v4 = iadd_imm v1, 1000 ; bin: 3e850393 - - return v9999 -} diff --git a/cranelift/filetests/filetests/isa/riscv/split-args.clif b/cranelift/filetests/filetests/isa/riscv/split-args.clif deleted file mode 100644 index 9f4b3e9268..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/split-args.clif +++ /dev/null @@ -1,55 +0,0 @@ -; Test the legalization of block arguments that are split. -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %simple(i64, i64) -> i64 { -block0(v1: i64, v2: i64): -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): - jump block1(v1) - ; check: jump block1($v1l, $v1h) - -block1(v3: i64): -; check: block1($(v3l=$V): i32, $(v3h=$V): i32): - v4 = band v3, v2 - ; check: $(v4l=$V) = band $v3l, $v2l - ; check: $(v4h=$V) = band $v3h, $v2h - return v4 - ; check: return $v4l, $v4h, $link -} - -function %multi(i64) -> i64 { -block1(v1: i64): -; check: block1($(v1l=$V): i32, $(v1h=$V): i32, $(link=$V): i32): - jump block2(v1, v1) - ; check: jump block2($v1l, $v1l, $v1h, $v1h) - -block2(v2: i64, v3: i64): -; check: block2($(v2l=$V): i32, $(v3l=$V): i32, $(v2h=$V): i32, $(v3h=$V): i32): - jump block3(v2) - ; check: jump block3($v2l, $v2h) - -block3(v4: i64): -; check: block3($(v4l=$V): i32, $(v4h=$V): i32): - v5 = band v4, v3 - ; check: $(v5l=$V) = band $v4l, $v3l - ; check: $(v5h=$V) = band $v4h, $v3h - return v5 - ; check: return $v5l, $v5h, $link -} - -function %loop(i64, i64) -> i64 { -block0(v1: i64, v2: i64): -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): - jump block1(v1) - ; check: jump block1($v1l, $v1h) - -block1(v3: i64): -; check: block1($(v3l=$V): i32, $(v3h=$V): i32): - v4 = band v3, v2 - ; check: $(v4l=$V) = band $v3l, $v2l - ; check: $(v4h=$V) = band $v3h, $v2h - jump block1(v4) - ; check: jump block1($v4l, $v4h) -} diff --git a/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif b/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif deleted file mode 100644 index 1d29b86da9..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif +++ /dev/null @@ -1,21 +0,0 @@ -test verifier -target riscv32 - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - fn0 = %foo() - -block0(v9999: i32): - ; iconst.i32 needs legalizing, so it should throw a - [R#0,-] v1 = iconst.i32 0xf0f0f0f0f0 ; error: Instruction failed to re-encode - [Iret#19] return v9999 -} - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - fn0 = %foo() - -block0(v9999: i32): - v1 = iconst.i32 1 - v2 = iconst.i32 2 - [R#0,-] v3 = iadd v1, v2 ; error: encoding R#00 should be R#0c - [Iret#19] return v9999 -} diff --git a/cranelift/filetests/filetests/parser/instruction_encoding.clif b/cranelift/filetests/filetests/parser/instruction_encoding.clif deleted file mode 100644 index 5386808482..0000000000 --- a/cranelift/filetests/filetests/parser/instruction_encoding.clif +++ /dev/null @@ -1,24 +0,0 @@ -test cat - -target riscv32 - -; regex: WS=[ \t]* - -function %foo(i32, i32) { -block1(v0: i32 [%x8], v1: i32): - [-,-] v2 = iadd v0, v1 - [-] trap heap_oob - [R#1234, %x5, %x11] v6, v7 = iadd_ifcout v2, v0 - [Rshamt#beef, %x25] v8 = ishl_imm v6, 2 -@55 v9 = iadd v8, v7 -@a5 [Iret#5] return v0, v8 -} -; sameln: function %foo(i32, i32) fast { -; nextln: block1(v0: i32 [%x8], v1: i32): -; nextln: [-,-]$WS v2 = iadd v0, v1 -; nextln: [-]$WS trap heap_oob -; nextln: [R#1234,%x5,%x11]$WS v6, v7 = iadd_ifcout v2, v0 -; nextln: [Rshamt#beef,%x25]$WS v8 = ishl_imm v6, 2 -; nextln: @0055 [-,-]$WS v9 = iadd v8, v7 -; nextln: @00a5 [Iret#05]$WS return v0, v8 -; nextln: } diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index 6a7fb5a282..58a321adba 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -3,7 +3,7 @@ use core::mem; use cranelift_codegen::binemit::{NullRelocSink, NullStackMapSink, NullTrapSink}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::ir::{condcodes::IntCC, Function, InstBuilder, Signature}; -use cranelift_codegen::isa::{BackendVariant, TargetIsa}; +use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::{ir, settings, CodegenError, Context}; use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext}; use cranelift_native::builder_with_options; diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index da593f8679..cd3b8f66df 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -111,28 +111,6 @@ cfg_if! { fn get_disassembler(isa: &dyn TargetIsa) -> Result { let cs = match isa.triple().architecture { - Architecture::Riscv32(_) => { - let mut cs = Capstone::new() - .riscv() - .mode(arch::riscv::ArchMode::RiscV32) - .extra_mode(std::iter::once(arch::riscv::ArchExtraMode::RiscVC)) - .build() - .map_err(map_caperr)?; - // See the comment of AArch64 below - cs.set_skipdata(true).map_err(map_caperr)?; - cs - } - Architecture::Riscv64(_) => { - let mut cs = Capstone::new() - .riscv() - .mode(arch::riscv::ArchMode::RiscV64) - .extra_mode(std::iter::once(arch::riscv::ArchExtraMode::RiscVC)) - .build() - .map_err(map_caperr)?; - // See the comment of AArch64 below - cs.set_skipdata(true).map_err(map_caperr)?; - cs - } Architecture::X86_32(_) => Capstone::new() .x86() .mode(arch::x86::ArchMode::Mode32) diff --git a/cranelift/wasm/Cargo.toml b/cranelift/wasm/Cargo.toml index 31e84a517d..cd45a4faac 100644 --- a/cranelift/wasm/Cargo.toml +++ b/cranelift/wasm/Cargo.toml @@ -26,8 +26,7 @@ smallvec = "1.6.1" [dev-dependencies] wat = "1.0.37" target-lexicon = "0.12" -# Enable the riscv feature for cranelift-codegen, as some tests require it -cranelift-codegen = { path = "../codegen", version = "0.77.0", default-features = false, features = ["riscv"] } +cranelift-codegen = { path = "../codegen", version = "0.77.0", default-features = false } [features] default = ["std"] diff --git a/cranelift/wasm/tests/wasm_testsuite.rs b/cranelift/wasm/tests/wasm_testsuite.rs index a2e0f7db7c..a5200c3d71 100644 --- a/cranelift/wasm/tests/wasm_testsuite.rs +++ b/cranelift/wasm/tests/wasm_testsuite.rs @@ -1,12 +1,11 @@ -use cranelift_codegen::isa; +use cranelift_codegen::isa::{CallConv, TargetFrontendConfig}; use cranelift_codegen::print_errors::pretty_verifier_error; use cranelift_codegen::settings::{self, Flags}; use cranelift_codegen::verifier; use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode}; use std::fs; use std::path::Path; -use std::str::FromStr; -use target_lexicon::triple; +use target_lexicon::PointerWidth; #[test] fn testsuite() { @@ -52,11 +51,15 @@ fn use_name_section() { ) .unwrap(); - let flags = Flags::new(settings::builder()); - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); let return_mode = ReturnMode::NormalReturns; - let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut dummy_environ = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U32, + }, + return_mode, + false, + ); translate_module(data.as_ref(), &mut dummy_environ).unwrap(); @@ -82,15 +85,20 @@ fn read_module(path: &Path) -> Vec { } fn handle_module(data: Vec, flags: &Flags, return_mode: ReturnMode) { - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); - let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut dummy_environ = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U64, + }, + return_mode, + false, + ); translate_module(&data, &mut dummy_environ).unwrap(); for func in dummy_environ.info.function_bodies.values() { - verifier::verify_function(func, &*isa) - .map_err(|errors| panic!("{}", pretty_verifier_error(func, Some(&*isa), None, errors))) + verifier::verify_function(func, flags) + .map_err(|errors| panic!("{}", pretty_verifier_error(func, None, None, errors))) .unwrap(); } } @@ -168,10 +176,14 @@ fn reachability_is_correct() { for (return_mode, wat, expected_reachability) in tests { println!("testing wat:\n{}", wat); - let flags = Flags::new(settings::builder()); - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); - let mut env = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut env = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U64, + }, + return_mode, + false, + ); env.test_expected_reachability(expected_reachability); let data = wat::parse_str(wat).unwrap(); translate_module(data.as_ref(), &mut env).unwrap(); From d8818c967ecbad7ba92882d64151c209c70a2aa9 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 12:06:24 +0200 Subject: [PATCH 03/14] Fix all dead-code warnings in cranelift-codegen-meta --- cranelift/codegen/meta/src/cdsl/ast.rs | 6 +- cranelift/codegen/meta/src/cdsl/cpu_modes.rs | 40 +- cranelift/codegen/meta/src/cdsl/encodings.rs | 130 +------ cranelift/codegen/meta/src/cdsl/formats.rs | 12 - .../codegen/meta/src/cdsl/instructions.rs | 341 +----------------- cranelift/codegen/meta/src/cdsl/recipes.rs | 125 ------- cranelift/codegen/meta/src/cdsl/regs.rs | 80 ---- cranelift/codegen/meta/src/cdsl/settings.rs | 8 - cranelift/codegen/meta/src/cdsl/types.rs | 14 - cranelift/codegen/meta/src/cdsl/xform.rs | 6 - 10 files changed, 14 insertions(+), 748 deletions(-) diff --git a/cranelift/codegen/meta/src/cdsl/ast.rs b/cranelift/codegen/meta/src/cdsl/ast.rs index 6bfd1721ae..f4f2afe273 100644 --- a/cranelift/codegen/meta/src/cdsl/ast.rs +++ b/cranelift/codegen/meta/src/cdsl/ast.rs @@ -456,7 +456,7 @@ impl Apply { // Apply should only operate on concrete value types, not "any". let value_types = value_types .into_iter() - .map(|vt| vt.expect("shouldn't be Any")) + .map(|vt| vt.expect()) .collect(); // Basic check on number of arguments. @@ -635,10 +635,6 @@ impl Into for Literal { #[derive(Clone)] pub(crate) struct DummyConstant(pub(crate) Vec); -pub(crate) fn constant(data: Vec) -> DummyConstant { - DummyConstant(data) -} - impl Into for DummyConstant { fn into(self) -> DummyExpr { DummyExpr::Constant(self) diff --git a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs index 7d119b00ce..873cfe3d95 100644 --- a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs +++ b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs @@ -2,8 +2,8 @@ use std::collections::{hash_map, HashMap, HashSet}; use std::iter::FromIterator; use crate::cdsl::encodings::Encoding; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::cdsl::xform::{TransformGroup, TransformGroupIndex}; +use crate::cdsl::types::ValueType; +use crate::cdsl::xform::TransformGroupIndex; pub(crate) struct CpuMode { pub name: &'static str, @@ -14,42 +14,6 @@ pub(crate) struct CpuMode { } impl CpuMode { - pub fn new(name: &'static str) -> Self { - Self { - name, - default_legalize: None, - monomorphic_legalize: None, - typed_legalize: HashMap::new(), - encodings: Vec::new(), - } - } - - pub fn set_encodings(&mut self, encodings: Vec) { - assert!(self.encodings.is_empty(), "clobbering encodings"); - self.encodings = encodings; - } - - pub fn legalize_monomorphic(&mut self, group: &TransformGroup) { - assert!(self.monomorphic_legalize.is_none()); - self.monomorphic_legalize = Some(group.id); - } - pub fn legalize_default(&mut self, group: &TransformGroup) { - assert!(self.default_legalize.is_none()); - self.default_legalize = Some(group.id); - } - pub fn legalize_value_type(&mut self, lane_type: impl Into, group: &TransformGroup) { - assert!(self - .typed_legalize - .insert(lane_type.into(), group.id) - .is_none()); - } - pub fn legalize_type(&mut self, lane_type: impl Into, group: &TransformGroup) { - assert!(self - .typed_legalize - .insert(lane_type.into().into(), group.id) - .is_none()); - } - pub fn get_default_legalize_code(&self) -> TransformGroupIndex { self.default_legalize .expect("a finished CpuMode must have a default legalize code") diff --git a/cranelift/codegen/meta/src/cdsl/encodings.rs b/cranelift/codegen/meta/src/cdsl/encodings.rs index 4d11beb206..3474e3dda0 100644 --- a/cranelift/codegen/meta/src/cdsl/encodings.rs +++ b/cranelift/codegen/meta/src/cdsl/encodings.rs @@ -1,12 +1,11 @@ use crate::cdsl::instructions::{ - InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode, - InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny, + InstSpec, Instruction, + InstructionPredicateNumber }; use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::SettingPredicateNumber; use crate::cdsl::types::ValueType; use std::rc::Rc; -use std::string::ToString; /// Encoding for a concrete instruction. /// @@ -51,128 +50,3 @@ impl EncodingContent { pub(crate) type Encoding = Rc; -pub(crate) struct EncodingBuilder { - inst: InstSpec, - recipe: EncodingRecipeNumber, - encbits: u16, - inst_predicate: Option, - isa_predicate: Option, - bound_type: Option, -} - -impl EncodingBuilder { - pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self { - let (inst_predicate, bound_type) = match &inst { - InstSpec::Bound(inst) => { - let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars; - - assert_eq!( - inst.value_types.len(), - other_typevars.len() + 1, - "partially bound polymorphic instruction" - ); - - // Add secondary type variables to the instruction predicate. - let value_types = &inst.value_types; - let mut inst_predicate: Option = None; - for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) { - let value_type = match value_type { - ValueTypeOrAny::Any => continue, - ValueTypeOrAny::ValueType(vt) => vt, - }; - let type_predicate = - InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type); - inst_predicate = Some(type_predicate.into()); - } - - // Add immediate value predicates - for (immediate_value, immediate_operand) in inst - .immediate_values - .iter() - .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate())) - { - let immediate_predicate = InstructionPredicate::new_is_field_equal( - &inst.inst.format, - immediate_operand.kind.rust_field_name, - immediate_value.to_string(), - ); - inst_predicate = if let Some(type_predicate) = inst_predicate { - Some(type_predicate.and(immediate_predicate)) - } else { - Some(immediate_predicate.into()) - } - } - - let ctrl_type = value_types[0] - .clone() - .expect("Controlling type shouldn't be Any"); - (inst_predicate, Some(ctrl_type)) - } - - InstSpec::Inst(inst) => { - assert!( - inst.polymorphic_info.is_none(), - "unbound polymorphic instruction" - ); - (None, None) - } - }; - - Self { - inst, - recipe, - encbits, - inst_predicate, - isa_predicate: None, - bound_type, - } - } - - pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self { - let inst_predicate = Some(match self.inst_predicate { - Some(node) => node.and(inst_predicate), - None => inst_predicate.into(), - }); - self.inst_predicate = inst_predicate; - self - } - - pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self { - assert!(self.isa_predicate.is_none()); - self.isa_predicate = Some(isa_predicate); - self - } - - pub fn build( - self, - recipes: &Recipes, - inst_pred_reg: &mut InstructionPredicateRegistry, - ) -> Encoding { - let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred)); - - let inst = self.inst.inst(); - assert!( - Rc::ptr_eq(&inst.format, &recipes[self.recipe].format), - "Inst {} and recipe {} must have the same format!", - inst.name, - recipes[self.recipe].name - ); - - assert_eq!( - inst.is_branch && !inst.is_indirect_branch, - recipes[self.recipe].branch_range.is_some(), - "Inst {}'s is_branch contradicts recipe {} branch_range!", - inst.name, - recipes[self.recipe].name - ); - - Rc::new(EncodingContent { - inst: self.inst, - recipe: self.recipe, - encbits: self.encbits, - inst_predicate, - isa_predicate: self.isa_predicate, - bound_type: self.bound_type, - }) - } -} diff --git a/cranelift/codegen/meta/src/cdsl/formats.rs b/cranelift/codegen/meta/src/cdsl/formats.rs index e713a8bccb..d445a4ed7e 100644 --- a/cranelift/codegen/meta/src/cdsl/formats.rs +++ b/cranelift/codegen/meta/src/cdsl/formats.rs @@ -70,18 +70,6 @@ impl fmt::Display for InstructionFormat { } impl InstructionFormat { - pub fn imm_by_name(&self, name: &'static str) -> &FormatField { - self.imm_fields - .iter() - .find(|&field| field.member == name) - .unwrap_or_else(|| { - panic!( - "unexpected immediate field named {} in instruction format {}", - name, self.name - ) - }) - } - /// Returns a tuple that uniquely identifies the structure. pub fn structure(&self) -> FormatStructure { FormatStructure { diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index 489217033a..db8f4b1e4e 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -1,7 +1,5 @@ -use cranelift_codegen_shared::condcodes::IntCC; use cranelift_entity::{entity_impl, PrimaryMap}; -use std::collections::HashMap; use std::fmt; use std::fmt::{Display, Error, Formatter}; use std::rc::Rc; @@ -10,10 +8,9 @@ use crate::cdsl::camel_case; use crate::cdsl::formats::{FormatField, InstructionFormat}; use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint; -use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; +use crate::cdsl::types::{LaneType, ReferenceType, ValueType}; use crate::cdsl::typevar::TypeVar; -use crate::shared::formats::Formats; use crate::shared::types::{Bool, Float, Int, Reference}; #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -317,11 +314,6 @@ impl InstructionBuilder { self } - pub fn clobbers_all_regs(mut self, val: bool) -> Self { - self.clobbers_all_regs = val; - self - } - fn build(self, opcode_number: OpcodeNumber) -> Instruction { let operands_in = self.operands_in.unwrap_or_else(Vec::new); let operands_out = self.operands_out.unwrap_or_else(Vec::new); @@ -387,33 +379,20 @@ impl InstructionBuilder { #[derive(Clone)] pub(crate) enum ValueTypeOrAny { ValueType(ValueType), - Any, } impl ValueTypeOrAny { - pub fn expect(self, msg: &str) -> ValueType { + pub fn expect(self) -> ValueType { match self { ValueTypeOrAny::ValueType(vt) => vt, - ValueTypeOrAny::Any => panic!("Unexpected Any: {}", msg), } } } -/// The number of bits in the vector -type VectorBitWidth = u64; - /// An parameter used for binding instructions to specific types or values pub(crate) enum BindParameter { - Any, Lane(LaneType), - Vector(LaneType, VectorBitWidth), Reference(ReferenceType), - Immediate(Immediate), -} - -/// Constructor for more easily building vector parameters from any lane type -pub(crate) fn vector(parameter: impl Into, vector_size: VectorBitWidth) -> BindParameter { - BindParameter::Vector(parameter.into(), vector_size) } impl From for BindParameter { @@ -446,22 +425,13 @@ impl From for BindParameter { } } -impl From for BindParameter { - fn from(imm: Immediate) -> Self { - BindParameter::Immediate(imm) - } -} - #[derive(Clone)] -pub(crate) enum Immediate { - // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128). - IntCC(IntCC), -} +pub(crate) enum Immediate {} impl Display for Immediate { - fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + fn fmt(&self, _f: &mut Formatter) -> Result<(), Error> { match self { - Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x), + _ => panic!(), } } } @@ -530,28 +500,14 @@ impl Bindable for BoundInstruction { fn bind(&self, parameter: impl Into) -> BoundInstruction { let mut modified = self.clone(); match parameter.into() { - BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any), BindParameter::Lane(lane_type) => modified .value_types .push(ValueTypeOrAny::ValueType(lane_type.into())), - BindParameter::Vector(lane_type, vector_size_in_bits) => { - let num_lanes = vector_size_in_bits / lane_type.lane_bits(); - assert!( - num_lanes >= 2, - "Minimum lane number for bind_vector is 2, found {}.", - num_lanes, - ); - let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); - modified - .value_types - .push(ValueTypeOrAny::ValueType(vector_type)); - } BindParameter::Reference(reference_type) => { modified .value_types .push(ValueTypeOrAny::ValueType(reference_type.into())); } - BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate), } modified.verify_bindings().unwrap(); modified @@ -767,41 +723,6 @@ fn is_ctrl_typevar_candidate( pub(crate) enum FormatPredicateKind { /// Is the field member equal to the expected value (stored here)? IsEqual(String), - - /// Is the immediate instruction format field representable as an n-bit two's complement - /// integer? (with width: first member, scale: second member). - /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a - /// multiple of `2^scale`. - IsSignedInt(usize, usize), - - /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with - /// width: first member, scale: second member). - /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of - /// `2^scale`. - IsUnsignedInt(usize, usize), - - /// Is the immediate format field member an integer equal to zero? - IsZeroInt, - /// Is the immediate format field member equal to zero? (float32 version) - IsZero32BitFloat, - - /// Is the immediate format field member equal to zero? (float64 version) - IsZero64BitFloat, - - /// Is the immediate format field member equal zero in all lanes? - IsAllZeroes, - - /// Does the immediate format field member have ones in all bits of all lanes? - IsAllOnes, - - /// Has the value list (in member_name) the size specified in parameter? - LengthEquals(usize), - - /// Is the referenced function colocated? - IsColocatedFunc, - - /// Is the referenced data object colocated? - IsColocatedData, } #[derive(Clone, Hash, PartialEq, Eq)] @@ -812,19 +733,6 @@ pub(crate) struct FormatPredicateNode { } impl FormatPredicateNode { - fn new( - format: &InstructionFormat, - field_name: &'static str, - kind: FormatPredicateKind, - ) -> Self { - let member_name = format.imm_by_name(field_name).member; - Self { - format_name: format.name, - member_name, - kind, - } - } - fn new_raw( format: &InstructionFormat, member_name: &'static str, @@ -839,11 +747,6 @@ impl FormatPredicateNode { fn destructuring_member_name(&self) -> &'static str { match &self.kind { - FormatPredicateKind::LengthEquals(_) => { - // Length operates on the argument value list. - assert!(self.member_name == "args"); - "ref args" - } _ => self.member_name, } } @@ -853,41 +756,6 @@ impl FormatPredicateNode { FormatPredicateKind::IsEqual(arg) => { format!("predicates::is_equal({}, {})", self.member_name, arg) } - FormatPredicateKind::IsSignedInt(width, scale) => format!( - "predicates::is_signed_int({}, {}, {})", - self.member_name, width, scale - ), - FormatPredicateKind::IsUnsignedInt(width, scale) => format!( - "predicates::is_unsigned_int({}, {}, {})", - self.member_name, width, scale - ), - FormatPredicateKind::IsZeroInt => { - format!("predicates::is_zero_int({})", self.member_name) - } - FormatPredicateKind::IsZero32BitFloat => { - format!("predicates::is_zero_32_bit_float({})", self.member_name) - } - FormatPredicateKind::IsZero64BitFloat => { - format!("predicates::is_zero_64_bit_float({})", self.member_name) - } - FormatPredicateKind::IsAllZeroes => format!( - "predicates::is_all_zeroes(func.dfg.constants.get({}))", - self.member_name - ), - FormatPredicateKind::IsAllOnes => format!( - "predicates::is_all_ones(func.dfg.constants.get({}))", - self.member_name - ), - FormatPredicateKind::LengthEquals(num) => format!( - "predicates::has_length_of({}, {}, func)", - self.member_name, num - ), - FormatPredicateKind::IsColocatedFunc => { - format!("predicates::is_colocated_func({}, func)", self.member_name,) - } - FormatPredicateKind::IsColocatedData => { - format!("predicates::is_colocated_data({}, func)", self.member_name) - } } } } @@ -926,9 +794,6 @@ pub(crate) enum InstructionPredicateNode { /// An AND-combination of two or more other predicates. And(Vec), - - /// An OR-combination of two or more other predicates. - Or(Vec), } impl InstructionPredicateNode { @@ -941,11 +806,6 @@ impl InstructionPredicateNode { .map(|x| x.rust_predicate(func_str)) .collect::>() .join(" && "), - InstructionPredicateNode::Or(nodes) => nodes - .iter() - .map(|x| x.rust_predicate(func_str)) - .collect::>() - .join(" || "), } } @@ -967,9 +827,9 @@ impl InstructionPredicateNode { pub fn is_type_predicate(&self) -> bool { match self { - InstructionPredicateNode::FormatPredicate(_) - | InstructionPredicateNode::And(_) - | InstructionPredicateNode::Or(_) => false, + InstructionPredicateNode::FormatPredicate(_) | InstructionPredicateNode::And(_) => { + false + } InstructionPredicateNode::TypePredicate(_) => true, } } @@ -977,7 +837,7 @@ impl InstructionPredicateNode { fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { let mut ret = Vec::new(); match self { - InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => { + InstructionPredicateNode::And(nodes) => { for node in nodes { ret.extend(node.collect_leaves()); } @@ -1004,10 +864,6 @@ impl InstructionPredicate { Self { node: None } } - pub fn unwrap(self) -> InstructionPredicateNode { - self.node.unwrap() - } - pub fn new_typevar_check( inst: &Instruction, type_var: &TypeVar, @@ -1032,18 +888,6 @@ impl InstructionPredicate { )) } - pub fn new_is_field_equal( - format: &InstructionFormat, - field_name: &'static str, - imm_value: String, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsEqual(imm_value), - )) - } - /// Used only for the AST module, which directly passes in the format field. pub fn new_is_field_equal_ast( format: &InstructionFormat, @@ -1057,127 +901,11 @@ impl InstructionPredicate { )) } - pub fn new_is_signed_int( - format: &InstructionFormat, - field_name: &'static str, - width: usize, - scale: usize, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsSignedInt(width, scale), - )) - } - - pub fn new_is_unsigned_int( - format: &InstructionFormat, - field_name: &'static str, - width: usize, - scale: usize, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsUnsignedInt(width, scale), - )) - } - - pub fn new_is_zero_int( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZeroInt, - )) - } - - pub fn new_is_zero_32bit_float( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZero32BitFloat, - )) - } - - pub fn new_is_zero_64bit_float( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZero64BitFloat, - )) - } - - pub fn new_is_all_zeroes( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsAllZeroes, - )) - } - - pub fn new_is_all_ones( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsAllOnes, - )) - } - - pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode { - assert!( - format.has_value_list, - "the format must be variadic in number of arguments" - ); - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( - format, - "args", - FormatPredicateKind::LengthEquals(size), - )) - } - - pub fn new_is_colocated_func( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsColocatedFunc, - )) - } - - pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode { - let format = &formats.unary_global_value; - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - &*format, - "global_value", - FormatPredicateKind::IsColocatedData, - )) - } - pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { let node = self.node; let mut and_nodes = match node { Some(node) => match node { InstructionPredicateNode::And(nodes) => nodes, - InstructionPredicateNode::Or(_) => { - panic!("Can't mix and/or without implementing operator precedence!") - } _ => vec![node], }, _ => Vec::new(), @@ -1187,23 +915,6 @@ impl InstructionPredicate { self } - pub fn or(mut self, new_node: InstructionPredicateNode) -> Self { - let node = self.node; - let mut or_nodes = match node { - Some(node) => match node { - InstructionPredicateNode::Or(nodes) => nodes, - InstructionPredicateNode::And(_) => { - panic!("Can't mix and/or without implementing operator precedence!") - } - _ => vec![node], - }, - _ => Vec::new(), - }; - or_nodes.push(new_node); - self.node = Some(InstructionPredicateNode::Or(or_nodes)); - self - } - pub fn rust_predicate(&self, func_str: &str) -> Option { self.node.as_ref().map(|root| root.rust_predicate(func_str)) } @@ -1232,40 +943,6 @@ entity_impl!(InstructionPredicateNumber); pub(crate) type InstructionPredicateMap = PrimaryMap; -/// A registry of predicates to help deduplicating them, during Encodings construction. When the -/// construction process is over, it needs to be extracted with `extract` and associated to the -/// TargetIsa. -pub(crate) struct InstructionPredicateRegistry { - /// Maps a predicate number to its actual predicate. - map: InstructionPredicateMap, - - /// Inverse map: maps a predicate to its predicate number. This is used before inserting a - /// predicate, to check whether it already exists. - inverted_map: HashMap, -} - -impl InstructionPredicateRegistry { - pub fn new() -> Self { - Self { - map: PrimaryMap::new(), - inverted_map: HashMap::new(), - } - } - pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber { - match self.inverted_map.get(&predicate) { - Some(&found) => found, - None => { - let key = self.map.push(predicate.clone()); - self.inverted_map.insert(predicate, key); - key - } - } - } - pub fn extract(self) -> InstructionPredicateMap { - self.map - } -} - /// An instruction specification, containing an instruction that has bound types or not. pub(crate) enum InstSpec { Inst(Instruction), diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs index e03b951f4d..36036fc085 100644 --- a/cranelift/codegen/meta/src/cdsl/recipes.rs +++ b/cranelift/codegen/meta/src/cdsl/recipes.rs @@ -23,12 +23,6 @@ pub(crate) struct Register { pub unit: u8, } -impl Register { - pub fn new(regclass: RegClassIndex, unit: u8) -> Self { - Self { regclass, unit } - } -} - /// An operand that must be in a stack slot. /// /// A `Stack` object can be used to indicate an operand constraint for a value @@ -39,9 +33,6 @@ pub(crate) struct Stack { } impl Stack { - pub fn new(regclass: RegClassIndex) -> Self { - Self { regclass } - } pub fn stack_base_mask(self) -> &'static str { // TODO: Make this configurable instead of just using the SP. "StackBaseMask(1)" @@ -179,119 +170,3 @@ pub(crate) struct EncodingRecipeBuilder { inst_predicate: Option, isa_predicate: Option, } - -impl EncodingRecipeBuilder { - pub fn new(name: impl Into, format: &Rc, base_size: u64) -> Self { - Self { - name: name.into(), - format: format.clone(), - base_size, - operands_in: None, - operands_out: None, - compute_size: None, - branch_range: None, - emit: None, - clobbers_flags: None, - inst_predicate: None, - isa_predicate: None, - } - } - - // Setters. - pub fn operands_in(mut self, constraints: Vec>) -> Self { - assert!(self.operands_in.is_none()); - self.operands_in = Some( - constraints - .into_iter() - .map(|constr| constr.into()) - .collect(), - ); - self - } - pub fn operands_out(mut self, constraints: Vec>) -> Self { - assert!(self.operands_out.is_none()); - self.operands_out = Some( - constraints - .into_iter() - .map(|constr| constr.into()) - .collect(), - ); - self - } - pub fn clobbers_flags(mut self, flag: bool) -> Self { - assert!(self.clobbers_flags.is_none()); - self.clobbers_flags = Some(flag); - self - } - pub fn emit(mut self, code: impl Into) -> Self { - assert!(self.emit.is_none()); - self.emit = Some(code.into()); - self - } - pub fn branch_range(mut self, range: (u64, u64)) -> Self { - assert!(self.branch_range.is_none()); - self.branch_range = Some(BranchRange { - inst_size: range.0, - range: range.1, - }); - self - } - pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self { - assert!(self.isa_predicate.is_none()); - self.isa_predicate = Some(pred); - self - } - pub fn inst_predicate(mut self, inst_predicate: impl Into) -> Self { - assert!(self.inst_predicate.is_none()); - self.inst_predicate = Some(inst_predicate.into()); - self - } - pub fn compute_size(mut self, compute_size: &'static str) -> Self { - assert!(self.compute_size.is_none()); - self.compute_size = Some(compute_size); - self - } - - pub fn build(self) -> EncodingRecipe { - let operands_in = self.operands_in.unwrap_or_default(); - let operands_out = self.operands_out.unwrap_or_default(); - - // The number of input constraints must match the number of format input operands. - if !self.format.has_value_list { - assert!( - operands_in.len() == self.format.num_value_operands, - "missing operand constraints for recipe {} (format {})", - self.name, - self.format.name - ); - } - - // Ensure tied inputs actually refer to existing inputs. - for constraint in operands_in.iter().chain(operands_out.iter()) { - if let OperandConstraint::TiedInput(n) = *constraint { - assert!(n < operands_in.len()); - } - } - - let compute_size = match self.compute_size { - Some(compute_size) => compute_size, - None => "base_size", - }; - - let clobbers_flags = self.clobbers_flags.unwrap_or(true); - - EncodingRecipe { - name: self.name, - format: self.format, - base_size: self.base_size, - operands_in, - operands_out, - compute_size, - branch_range: self.branch_range, - clobbers_flags, - inst_predicate: self.inst_predicate, - isa_predicate: self.isa_predicate, - emit: self.emit, - } - } -} diff --git a/cranelift/codegen/meta/src/cdsl/regs.rs b/cranelift/codegen/meta/src/cdsl/regs.rs index 864826ee43..11e1d83dd6 100644 --- a/cranelift/codegen/meta/src/cdsl/regs.rs +++ b/cranelift/codegen/meta/src/cdsl/regs.rs @@ -39,37 +39,6 @@ impl RegBank { classes: Vec::new(), } } - - fn unit_by_name(&self, name: &'static str) -> u8 { - let unit = if let Some(found) = self.names.iter().position(|®_name| reg_name == name) { - found - } else { - // Try to match without the bank prefix. - assert!(name.starts_with(self.prefix)); - let name_without_prefix = &name[self.prefix.len()..]; - if let Some(found) = self - .names - .iter() - .position(|®_name| reg_name == name_without_prefix) - { - found - } else { - // Ultimate try: try to parse a number and use this in the array, eg r15 on x86. - if let Ok(as_num) = name_without_prefix.parse::() { - assert!( - as_num < self.units, - "trying to get {}, but bank only has {} registers!", - name, - self.units - ); - as_num as usize - } else { - panic!("invalid register name {}", name); - } - } - }; - self.first_unit + (unit as u8) - } } #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] @@ -132,7 +101,6 @@ impl RegClass { pub(crate) enum RegClassProto { TopLevel(RegBankIndex), - SubClass(RegClassIndex), } pub(crate) struct RegClassBuilder { @@ -153,21 +121,6 @@ impl RegClassBuilder { proto: RegClassProto::TopLevel(bank), } } - pub fn subclass_of( - name: &'static str, - parent_index: RegClassIndex, - start: u8, - stop: u8, - ) -> Self { - assert!(stop >= start); - Self { - name, - width: 0, - count: stop - start, - start, - proto: RegClassProto::SubClass(parent_index), - } - } pub fn count(mut self, count: u8) -> Self { self.count = count; self @@ -175,7 +128,6 @@ impl RegClassBuilder { pub fn width(mut self, width: u8) -> Self { match self.proto { RegClassProto::TopLevel(_) => self.width = width, - RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."), } self } @@ -213,11 +165,6 @@ impl RegBankBuilder { self.pressure_tracking = Some(track); self } - pub fn pinned_reg(mut self, unit: u16) -> Self { - assert!(unit < u16::from(self.units)); - self.pinned_reg = Some(unit); - self - } } pub(crate) struct IsaRegsBuilder { @@ -274,20 +221,6 @@ impl IsaRegsBuilder { .push(class_index); (bank_index, class_index, builder.start, builder.width) } - RegClassProto::SubClass(parent_class_index) => { - assert!(builder.width == 0); - let (bank, toprc, start, width) = { - let parent = self.classes.get(parent_class_index).unwrap(); - (parent.bank, parent.toprc, parent.start, parent.width) - }; - for reg_class in self.classes.values_mut() { - if reg_class.toprc == toprc { - reg_class.subclasses.push(class_index); - } - } - let subclass_start = start + builder.start * width; - (bank, toprc, subclass_start, width) - } }; let reg_bank_units = self.banks.get(bank).unwrap().units; @@ -396,17 +329,4 @@ impl IsaRegs { ) -> Self { Self { banks, classes } } - - pub fn class_by_name(&self, name: &str) -> RegClassIndex { - self.classes - .values() - .find(|&class| class.name == name) - .unwrap_or_else(|| panic!("register class {} not found", name)) - .index - } - - pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 { - let bank_index = self.classes.get(class_index).unwrap().bank; - self.banks.get(bank_index).unwrap().unit_by_name(name) - } } diff --git a/cranelift/codegen/meta/src/cdsl/settings.rs b/cranelift/codegen/meta/src/cdsl/settings.rs index 52c51d54a8..c4e76b760f 100644 --- a/cranelift/codegen/meta/src/cdsl/settings.rs +++ b/cranelift/codegen/meta/src/cdsl/settings.rs @@ -150,14 +150,6 @@ impl SettingGroup { } panic!("Should have found bool setting by name."); } - - pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber { - self.predicates - .iter() - .find(|pred| pred.name == name) - .unwrap_or_else(|| panic!("unknown predicate {}", name)) - .number - } } /// This is the basic information needed to track the specific parts of a setting when building diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index 7e03c873db..18657a2e26 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -237,20 +237,6 @@ impl LaneType { ValueType::Vector(VectorType::new(self, lanes.into())) } } - - pub fn is_float(self) -> bool { - match self { - LaneType::Float(_) => true, - _ => false, - } - } - - pub fn is_int(self) -> bool { - match self { - LaneType::Int(_) => true, - _ => false, - } - } } impl fmt::Display for LaneType { diff --git a/cranelift/codegen/meta/src/cdsl/xform.rs b/cranelift/codegen/meta/src/cdsl/xform.rs index 95b7af867c..da0cc5ba71 100644 --- a/cranelift/codegen/meta/src/cdsl/xform.rs +++ b/cranelift/codegen/meta/src/cdsl/xform.rs @@ -384,12 +384,6 @@ impl TransformGroupBuilder { self } - pub fn isa(mut self, isa_name: &'static str) -> Self { - assert!(self.isa_name.is_none()); - self.isa_name = Some(isa_name); - self - } - /// Add a custom legalization action for `inst`. /// /// The `func_name` parameter is the fully qualified name of a Rust function which takes the From d499933612cfa573f22602c6cb7c7d94da62dc4a Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 12:13:20 +0200 Subject: [PATCH 04/14] Remove encoding generation from cranelift-codegen-meta --- cranelift/codegen/meta/src/cdsl/cpu_modes.rs | 24 +- cranelift/codegen/meta/src/cdsl/encodings.rs | 52 - .../codegen/meta/src/cdsl/instructions.rs | 60 - cranelift/codegen/meta/src/cdsl/isa.rs | 7 - cranelift/codegen/meta/src/cdsl/mod.rs | 1 - cranelift/codegen/meta/src/cdsl/recipes.rs | 7 - cranelift/codegen/meta/src/gen_binemit.rs | 224 ---- cranelift/codegen/meta/src/gen_encodings.rs | 1139 ----------------- cranelift/codegen/meta/src/lib.rs | 16 - 9 files changed, 1 insertion(+), 1529 deletions(-) delete mode 100644 cranelift/codegen/meta/src/cdsl/encodings.rs delete mode 100644 cranelift/codegen/meta/src/gen_binemit.rs delete mode 100644 cranelift/codegen/meta/src/gen_encodings.rs diff --git a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs index 873cfe3d95..e42a27da2a 100644 --- a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs +++ b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs @@ -1,7 +1,6 @@ -use std::collections::{hash_map, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::iter::FromIterator; -use crate::cdsl::encodings::Encoding; use crate::cdsl::types::ValueType; use crate::cdsl::xform::TransformGroupIndex; @@ -10,30 +9,9 @@ pub(crate) struct CpuMode { default_legalize: Option, monomorphic_legalize: Option, typed_legalize: HashMap, - pub encodings: Vec, } impl CpuMode { - pub fn get_default_legalize_code(&self) -> TransformGroupIndex { - self.default_legalize - .expect("a finished CpuMode must have a default legalize code") - } - pub fn get_legalize_code_for(&self, typ: &Option) -> TransformGroupIndex { - match typ { - Some(typ) => self - .typed_legalize - .get(typ) - .copied() - .unwrap_or_else(|| self.get_default_legalize_code()), - None => self - .monomorphic_legalize - .unwrap_or_else(|| self.get_default_legalize_code()), - } - } - pub fn get_legalized_types(&self) -> hash_map::Keys { - self.typed_legalize.keys() - } - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly /// reachable set of TransformGroup this TargetIsa uses. pub fn direct_transform_groups(&self) -> Vec { diff --git a/cranelift/codegen/meta/src/cdsl/encodings.rs b/cranelift/codegen/meta/src/cdsl/encodings.rs deleted file mode 100644 index 3474e3dda0..0000000000 --- a/cranelift/codegen/meta/src/cdsl/encodings.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::cdsl::instructions::{ - InstSpec, Instruction, - InstructionPredicateNumber -}; -use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::SettingPredicateNumber; -use crate::cdsl::types::ValueType; -use std::rc::Rc; - -/// Encoding for a concrete instruction. -/// -/// An `Encoding` object ties an instruction opcode with concrete type variables together with an -/// encoding recipe and encoding encbits. -/// -/// The concrete instruction can be in three different forms: -/// -/// 1. A naked opcode: `trap` for non-polymorphic instructions. -/// 2. With bound type variables: `iadd.i32` for polymorphic instructions. -/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`. -/// -/// If the instruction is polymorphic, all type variables must be provided. -pub(crate) struct EncodingContent { - /// The `Instruction` or `BoundInstruction` being encoded. - inst: InstSpec, - - /// The `EncodingRecipe` to use. - pub recipe: EncodingRecipeNumber, - - /// Additional encoding bits to be interpreted by `recipe`. - pub encbits: u16, - - /// An instruction predicate that must be true to allow selecting this encoding. - pub inst_predicate: Option, - - /// An ISA predicate that must be true to allow selecting this encoding. - pub isa_predicate: Option, - - /// The value type this encoding has been bound to, for encodings of polymorphic instructions. - pub bound_type: Option, -} - -impl EncodingContent { - pub fn inst(&self) -> &Instruction { - self.inst.inst() - } - pub fn to_rust_comment(&self, recipes: &Recipes) -> String { - format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits) - } -} - -pub(crate) type Encoding = Rc; - diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index db8f4b1e4e..f7258ea300 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -745,12 +745,6 @@ impl FormatPredicateNode { } } - fn destructuring_member_name(&self) -> &'static str { - match &self.kind { - _ => self.member_name, - } - } - fn rust_predicate(&self) -> String { match &self.kind { FormatPredicateKind::IsEqual(arg) => { @@ -808,44 +802,6 @@ impl InstructionPredicateNode { .join(" && "), } } - - pub fn format_destructuring_member_name(&self) -> &str { - match self { - InstructionPredicateNode::FormatPredicate(format_pred) => { - format_pred.destructuring_member_name() - } - _ => panic!("Only for leaf format predicates"), - } - } - - pub fn format_name(&self) -> &str { - match self { - InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name, - _ => panic!("Only for leaf format predicates"), - } - } - - pub fn is_type_predicate(&self) -> bool { - match self { - InstructionPredicateNode::FormatPredicate(_) | InstructionPredicateNode::And(_) => { - false - } - InstructionPredicateNode::TypePredicate(_) => true, - } - } - - fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { - let mut ret = Vec::new(); - match self { - InstructionPredicateNode::And(nodes) => { - for node in nodes { - ret.extend(node.collect_leaves()); - } - } - _ => ret.push(self), - } - ret - } } #[derive(Clone, Hash, PartialEq, Eq)] @@ -918,22 +874,6 @@ impl InstructionPredicate { pub fn rust_predicate(&self, func_str: &str) -> Option { self.node.as_ref().map(|root| root.rust_predicate(func_str)) } - - /// Returns the type predicate if this is one, or None otherwise. - pub fn type_predicate(&self, func_str: &str) -> Option { - let node = self.node.as_ref().unwrap(); - if node.is_type_predicate() { - Some(node.rust_predicate(func_str)) - } else { - None - } - } - - /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening - /// AND/OR). - pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { - self.node.as_ref().unwrap().collect_leaves() - } } #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] diff --git a/cranelift/codegen/meta/src/cdsl/isa.rs b/cranelift/codegen/meta/src/cdsl/isa.rs index f6ac9f2493..28844c97fb 100644 --- a/cranelift/codegen/meta/src/cdsl/isa.rs +++ b/cranelift/codegen/meta/src/cdsl/isa.rs @@ -86,11 +86,4 @@ impl TargetIsa { pub fn direct_transform_groups(&self) -> &Vec { &self.local_transform_groups } - - pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize { - self.local_transform_groups - .iter() - .position(|&val| val == group_index) - .expect("TransformGroup unused by this TargetIsa!") - } } diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs index 698b64dff3..bd08ebfe41 100644 --- a/cranelift/codegen/meta/src/cdsl/mod.rs +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -6,7 +6,6 @@ #[macro_use] pub mod ast; pub mod cpu_modes; -pub mod encodings; pub mod formats; pub mod instructions; pub mod isa; diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs index 36036fc085..ea9ce5f20b 100644 --- a/cranelift/codegen/meta/src/cdsl/recipes.rs +++ b/cranelift/codegen/meta/src/cdsl/recipes.rs @@ -32,13 +32,6 @@ pub(crate) struct Stack { pub regclass: RegClassIndex, } -impl Stack { - pub fn stack_base_mask(self) -> &'static str { - // TODO: Make this configurable instead of just using the SP. - "StackBaseMask(1)" - } -} - #[derive(Clone, Hash, PartialEq)] pub(crate) struct BranchRange { pub inst_size: u64, diff --git a/cranelift/codegen/meta/src/gen_binemit.rs b/cranelift/codegen/meta/src/gen_binemit.rs deleted file mode 100644 index f67aa9b5a9..0000000000 --- a/cranelift/codegen/meta/src/gen_binemit.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Generate binary emission code for each ISA. - -use cranelift_entity::EntityRef; - -use crate::error; -use crate::srcgen::Formatter; - -use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes}; - -/// Generate code to handle a single recipe. -/// -/// - Unpack the instruction data, knowing the format. -/// - Determine register locations for operands with register constraints. -/// - Determine stack slot locations for operands with stack constraints. -/// - Call hand-written code for the actual emission. -fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) { - let inst_format = &recipe.format; - let num_value_ops = inst_format.num_value_operands; - - // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value - // list. - let want_args = inst_format.has_value_list - || recipe.operands_in.iter().any(|c| match c { - OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, - OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, - }); - assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list); - - let want_outs = recipe.operands_out.iter().any(|c| match c { - OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, - OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, - }); - - let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name); - - // Unpack the instruction data. - fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name); - fmt.indent(|fmt| { - fmt.line("opcode,"); - for f in &inst_format.imm_fields { - fmtln!(fmt, "{},", f.member); - } - if want_args { - if inst_format.has_value_list || num_value_ops > 1 { - fmt.line("ref args,"); - } else { - fmt.line("arg,"); - } - } - fmt.line(".."); - - fmt.outdented_line("} = *inst_data {"); - - // Pass recipe arguments in this order: inputs, imm_fields, outputs. - let mut args = String::new(); - - if want_args && !is_regmove { - if inst_format.has_value_list { - fmt.line("let args = args.as_slice(&func.dfg.value_lists);"); - } else if num_value_ops == 1 { - fmt.line("let args = [arg];"); - } - args += &unwrap_values(&recipe.operands_in, "in", "args", fmt); - } - - for f in &inst_format.imm_fields { - args += &format!(", {}", f.member); - } - - // Unwrap interesting output arguments. - if want_outs { - if recipe.operands_out.len() == 1 { - fmt.line("let results = [func.dfg.first_result(inst)];") - } else { - fmt.line("let results = func.dfg.inst_results(inst);"); - } - args += &unwrap_values(&recipe.operands_out, "out", "results", fmt); - } - - // Optimization: Only update the register diversion tracker for regmove instructions. - if is_regmove { - fmt.line("divert.apply(inst_data);") - } - - match &recipe.emit { - Some(emit) => { - fmt.multi_line(emit); - fmt.line("return;"); - } - None => { - fmtln!( - fmt, - "return recipe_{}(func, inst, sink, bits{});", - recipe.name.to_lowercase(), - args - ); - } - } - }); - fmt.line("}"); -} - -/// Emit code that unwraps values living in registers or stack slots. -/// -/// :param args: Input or output constraints. -/// :param prefix: Prefix to be used for the generated local variables. -/// :param values: Name of slice containing the values to be unwrapped. -/// :returns: Comma separated list of the generated variables -fn unwrap_values( - args: &[OperandConstraint], - prefix: &str, - values_slice: &str, - fmt: &mut Formatter, -) -> String { - let mut varlist = String::new(); - for (i, cst) in args.iter().enumerate() { - match cst { - OperandConstraint::RegClass(_reg_class) => { - let v = format!("{}_reg{}", prefix, i); - varlist += &format!(", {}", v); - fmtln!( - fmt, - "let {} = divert.reg({}[{}], &func.locations);", - v, - values_slice, - i - ); - } - OperandConstraint::Stack(stack) => { - let v = format!("{}_stk{}", prefix, i); - varlist += &format!(", {}", v); - fmtln!(fmt, "let {} = StackRef::masked(", v); - fmt.indent(|fmt| { - fmtln!( - fmt, - "divert.stack({}[{}], &func.locations),", - values_slice, - i - ); - fmt.line(format!("{},", stack.stack_base_mask())); - fmt.line("&func.stack_slots,"); - }); - fmt.line(").unwrap();"); - } - _ => {} - } - } - varlist -} - -fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) { - fmt.doc_comment(format!( - "Emit binary machine code for `inst` for the {} ISA.", - isa_name - )); - - if recipes.is_empty() { - fmt.line("pub fn emit_inst("); - fmt.indent(|fmt| { - fmt.line("func: &Function,"); - fmt.line("inst: Inst,"); - fmt.line("_divert: &mut RegDiversions,"); - fmt.line("_sink: &mut CS,"); - fmt.line("_isa: &dyn TargetIsa,"); - }); - fmt.line(") {"); - fmt.indent(|fmt| { - // No encoding recipes: Emit a stub. - fmt.line("bad_encoding(func, inst)"); - }); - fmt.line("}"); - return; - } - - fmt.line("#[allow(unused_variables, unreachable_code)]"); - fmt.line("pub fn emit_inst("); - fmt.indent(|fmt| { - fmt.line("func: &Function,"); - fmt.line("inst: Inst,"); - fmt.line("divert: &mut RegDiversions,"); - fmt.line("sink: &mut CS,"); - fmt.line("isa: &dyn TargetIsa,") - }); - - fmt.line(") {"); - fmt.indent(|fmt| { - fmt.line("let encoding = func.encodings[inst];"); - fmt.line("let bits = encoding.bits();"); - fmt.line("let inst_data = &func.dfg[inst];"); - fmt.line("match encoding.recipe() {"); - fmt.indent(|fmt| { - for (i, recipe) in recipes.iter() { - fmt.comment(format!("Recipe {}", recipe.name)); - fmtln!(fmt, "{} => {{", i.index()); - fmt.indent(|fmt| { - gen_recipe(recipe, fmt); - }); - fmt.line("}"); - } - fmt.line("_ => {},"); - }); - fmt.line("}"); - - // Allow for unencoded ghost instructions. The verifier will check details. - fmt.line("if encoding.is_legal() {"); - fmt.indent(|fmt| { - fmt.line("bad_encoding(func, inst);"); - }); - fmt.line("}"); - }); - fmt.line("}"); -} - -pub(crate) fn generate( - isa_name: &str, - recipes: &Recipes, - binemit_filename: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(isa_name, recipes, &mut fmt); - fmt.update_file(binemit_filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/gen_encodings.rs b/cranelift/codegen/meta/src/gen_encodings.rs deleted file mode 100644 index d7bb289bd2..0000000000 --- a/cranelift/codegen/meta/src/gen_encodings.rs +++ /dev/null @@ -1,1139 +0,0 @@ -//! Generate sources for instruction encoding. -//! -//! The tables and functions generated here support the `TargetISA::encode()` function which -//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a -//! *recipe* and some *encoding* bits. -//! -//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a -//! corresponding hand-written function to do that after registers are allocated. -//! -//! This is the information available to us: -//! -//! - The instruction to be encoded as an `InstructionData` reference. -//! - The controlling type variable. -//! - The data-flow graph giving us access to the types of all values involved. This is needed for -//! testing any secondary type variables. -//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates. -//! - The currently active CPU mode is determined by the ISA. -//! -//! ## Level 1 table lookup -//! -//! The CPU mode provides the first table. The key is the instruction's controlling type variable. -//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values -//! are level 2 tables. -//! -//! ## Level 2 table lookup -//! -//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*. -//! -//! The two-level table lookup allows the level 2 tables to be much smaller with good locality. -//! Code in any given function usually only uses a few different types, so many of the level 2 -//! tables will be cold. -//! -//! ## Encoding lists -//! -//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms: -//! -//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied. -//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied. -//! Otherwise, stop with the default legalization code. -//! 3. Stop with legalization code. -//! 4. Predicate + skip count. Test predicate and skip N entries if it is false. -//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false. -//! -//! The instruction predicate is also used to distinguish between polymorphic instructions with -//! different types for secondary type variables. - -use std::collections::btree_map; -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::convert::TryFrom; -use std::iter::FromIterator; - -use cranelift_codegen_shared::constant_hash::generate_table; -use cranelift_entity::EntityRef; - -use crate::error; -use crate::srcgen::Formatter; - -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::encodings::Encoding; -use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber}; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register}; -use crate::cdsl::regs::IsaRegs; -use crate::cdsl::settings::SettingPredicateNumber; -use crate::cdsl::types::ValueType; -use crate::cdsl::xform::TransformGroupIndex; - -use crate::shared::Definitions as SharedDefinitions; - -use crate::default_map::MapWithDefault; -use crate::unique_table::UniqueSeqTable; - -/// Emit code for matching an instruction predicate against an `InstructionData` reference called -/// `inst`. -/// -/// The generated code is an `if let` pattern match that falls through if the instruction has an -/// unexpected format. This should lead to a panic. -fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) { - if let Some(type_predicate) = instp.type_predicate("func") { - fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); - fmt.line(type_predicate); - return; - } - - let leaves = instp.collect_leaves(); - - let mut has_type_check = false; - let mut format_name = None; - let mut field_names = HashSet::new(); - - for leaf in leaves { - if leaf.is_type_predicate() { - has_type_check = true; - } else { - field_names.insert(leaf.format_destructuring_member_name()); - let leaf_format_name = leaf.format_name(); - match format_name { - None => format_name = Some(leaf_format_name), - Some(previous_format_name) => { - assert!( - previous_format_name == leaf_format_name, - "Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name - ); - } - } - } - } - - let mut fields = Vec::from_iter(field_names); - fields.sort(); - let fields = fields.join(", "); - - let format_name = format_name.expect("There should be a format name!"); - - fmtln!( - fmt, - "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{", - format_name, - fields - ); - fmt.indent(|fmt| { - if has_type_check { - // We could implement this. - assert!(has_func, "recipe predicates can't check type variables."); - fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); - } else if has_func { - // Silence dead argument. - fmt.line("let _ = func;"); - } - fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap()); - }); - fmtln!(fmt, "}"); - - fmt.line("unreachable!();"); -} - -/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES` -/// array indexed by recipe number. -/// -/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many -/// recipes have identical predicates. -fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) { - let mut predicate_names = HashMap::new(); - - fmt.comment(format!("{} recipe predicates.", isa.name)); - for recipe in isa.recipes.values() { - let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) { - (None, None) => continue, - (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue, - (isap, instp) => (isap, instp), - }; - - let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase()); - predicate_names.insert((isap, instp), func_name.clone()); - - // Generate the predicate function. - fmtln!( - fmt, - "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{", - func_name, - if isap.is_some() { "isap" } else { "_" }, - if instp.is_some() { "inst" } else { "_" } - ); - fmt.indent(|fmt| { - match (isap, instp) { - (Some(isap), None) => { - fmtln!(fmt, "isap.test({})", isap); - } - (None, Some(instp)) => { - emit_instp(instp, /* has func */ false, fmt); - } - (Some(isap), Some(instp)) => { - fmtln!(fmt, "isap.test({}) &&", isap); - emit_instp(instp, /* has func */ false, fmt); - } - _ => panic!("skipped above"), - } - }); - fmtln!(fmt, "}"); - } - fmt.empty_line(); - - // Generate the static table. - fmt.doc_comment(format!( - r#"{} recipe predicate table. - - One entry per recipe, set to Some only when the recipe is guarded by a predicate."#, - isa.name - )); - fmtln!( - fmt, - "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - match (&recipe.isa_predicate, &recipe.inst_predicate) { - (None, None) => fmt.line("None,"), - key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()), - } - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit private functions for matching instruction predicates as well as a static -/// `INST_PREDICATES` array indexed by predicate number. -fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.comment(format!("{} instruction predicates.", isa.name)); - for (id, instp) in isa.encodings_predicates.iter() { - fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index()); - fmt.indent(|fmt| { - emit_instp(instp, /* has func */ true, fmt); - }); - fmtln!(fmt, "}"); - } - fmt.empty_line(); - - // Generate the static table. - fmt.doc_comment(format!( - r#"{} instruction predicate table. - - One entry per instruction predicate, so the encoding bytecode can embed indexes into this - table."#, - isa.name - )); - fmtln!( - fmt, - "pub static INST_PREDICATES: [InstPredicate; {}] = [", - isa.encodings_predicates.len() - ); - fmt.indent(|fmt| { - for id in isa.encodings_predicates.keys() { - fmtln!(fmt, "inst_predicate_{},", id.index()); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit a table of encoding recipe names keyed by recipe number. -/// -/// This is used for pretty-printing encodings. -fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe names, using the same recipe index spaces as the one specified by the - corresponding binemit file."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_NAMES: [&str; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - fmtln!(fmt, r#""{}","#, recipe.name); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Returns a set of all the registers involved in fixed register constraints. -fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet { - HashSet::from_iter( - operands_in - .iter() - .map(|constraint| { - if let OperandConstraint::FixedReg(reg) = &constraint { - Some(*reg) - } else { - None - } - }) - .filter(|opt| opt.is_some()) - .map(|opt| opt.unwrap()), - ) -} - -/// Emit a struct field initializer for an array of operand constraints. -/// -/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on -/// inputs, fixed_registers must contain the fixed output registers). -fn emit_operand_constraints( - registers: &IsaRegs, - recipe: &EncodingRecipe, - constraints: &[OperandConstraint], - field_name: &'static str, - tied_operands: &HashMap, - fixed_registers: &HashSet, - fmt: &mut Formatter, -) { - if constraints.is_empty() { - fmtln!(fmt, "{}: &[],", field_name); - return; - } - - fmtln!(fmt, "{}: &[", field_name); - fmt.indent(|fmt| { - for (n, constraint) in constraints.iter().enumerate() { - fmt.line("OperandConstraint {"); - fmt.indent(|fmt| { - match constraint { - OperandConstraint::RegClass(reg_class) => { - if let Some(tied_input) = tied_operands.get(&n) { - fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); - } else { - fmt.line("kind: ConstraintKind::Reg,"); - } - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[*reg_class].name - ); - } - OperandConstraint::FixedReg(reg) => { - assert!(!tied_operands.contains_key(&n), "can't tie fixed registers"); - let constraint_kind = if fixed_registers.contains(®) { - "FixedTied" - } else { - "FixedReg" - }; - fmtln!( - fmt, - "kind: ConstraintKind::{}({}),", - constraint_kind, - reg.unit - ); - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[reg.regclass].name - ); - } - OperandConstraint::TiedInput(tied_input) => { - // This is a tied output constraint. It should never happen - // for input constraints. - assert!( - tied_input == tied_operands.get(&n).unwrap(), - "invalid tied constraint" - ); - fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); - - let tied_class = if let OperandConstraint::RegClass(tied_class) = - recipe.operands_in[*tied_input] - { - tied_class - } else { - panic!("tied constraints relate only to register inputs"); - }; - - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[tied_class].name - ); - } - OperandConstraint::Stack(stack) => { - assert!(!tied_operands.contains_key(&n), "can't tie stack operand"); - fmt.line("kind: ConstraintKind::Stack,"); - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[stack.regclass].name - ); - } - } - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "],"); -} - -/// Emit a table of encoding recipe operand constraints keyed by recipe number. -/// -/// These are used by the register allocator to pick registers that can be properly encoded. -fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe constraints list, using the same recipe index spaces as the one - specified by the corresponding binemit file. These constraints are used by register - allocation to select the right location to use for input and output values."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - // Compute a mapping of tied operands in both directions (input tied to outputs and - // conversely). - let mut tied_in_to_out = HashMap::new(); - let mut tied_out_to_in = HashMap::new(); - for (out_index, constraint) in recipe.operands_out.iter().enumerate() { - if let OperandConstraint::TiedInput(in_index) = &constraint { - tied_in_to_out.insert(*in_index, out_index); - tied_out_to_in.insert(out_index, *in_index); - } - } - - // Find the sets of registers involved in fixed register constraints. - let fixed_inputs = get_fixed_registers(&recipe.operands_in); - let fixed_outputs = get_fixed_registers(&recipe.operands_out); - - fmt.comment(format!("Constraints for recipe {}:", recipe.name)); - fmt.line("RecipeConstraints {"); - fmt.indent(|fmt| { - emit_operand_constraints( - &isa.regs, - recipe, - &recipe.operands_in, - "ins", - &tied_in_to_out, - &fixed_outputs, - fmt, - ); - emit_operand_constraints( - &isa.regs, - recipe, - &recipe.operands_out, - "outs", - &tied_out_to_in, - &fixed_inputs, - fmt, - ); - fmtln!( - fmt, - "fixed_ins: {},", - if !fixed_inputs.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "fixed_outs: {},", - if !fixed_outputs.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "tied_ops: {},", - if !tied_in_to_out.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "clobbers_flags: {},", - if recipe.clobbers_flags { - "true" - } else { - "false" - } - ); - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit a table of encoding recipe code size information. -fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe sizing descriptors, using the same recipe index spaces as the one - specified by the corresponding binemit file. These are used to compute the final size of an - instruction, as well as to compute the range of branches."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_SIZING: [RecipeSizing; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - fmt.comment(format!("Code size information for recipe {}:", recipe.name)); - fmt.line("RecipeSizing {"); - fmt.indent(|fmt| { - fmtln!(fmt, "base_size: {},", recipe.base_size); - fmtln!(fmt, "compute_size: {},", recipe.compute_size); - if let Some(range) = &recipe.branch_range { - fmtln!( - fmt, - "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),", - range.inst_size, - range.range - ); - } else { - fmt.line("branch_range: None,"); - } - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Level 1 table mapping types to `Level2` objects. -struct Level1Table<'cpu_mode> { - cpu_mode: &'cpu_mode CpuMode, - legalize_code: TransformGroupIndex, - - table_map: HashMap, usize>, - table_vec: Vec, -} - -impl<'cpu_mode> Level1Table<'cpu_mode> { - fn new(cpu_mode: &'cpu_mode CpuMode) -> Self { - Self { - cpu_mode, - legalize_code: cpu_mode.get_default_legalize_code(), - table_map: HashMap::new(), - table_vec: Vec::new(), - } - } - - /// Returns the level2 table for the given type; None means monomorphic, in this context. - fn l2table_for(&mut self, typ: Option) -> &mut Level2Table { - let cpu_mode = &self.cpu_mode; - let index = match self.table_map.get(&typ) { - Some(&index) => index, - None => { - let legalize_code = cpu_mode.get_legalize_code_for(&typ); - let table = Level2Table::new(typ.clone(), legalize_code); - let index = self.table_vec.len(); - self.table_map.insert(typ, index); - self.table_vec.push(table); - index - } - }; - self.table_vec.get_mut(index).unwrap() - } - - fn l2tables(&mut self) -> Vec<&mut Level2Table> { - self.table_vec - .iter_mut() - .filter(|table| !table.is_empty()) - .collect::>() - } -} - -struct Level2HashTableEntry { - inst_name: String, - offset: usize, -} - -/// Level 2 table mapping instruction opcodes to `EncList` objects. -/// -/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`. -struct Level2Table { - typ: Option, - legalize_code: TransformGroupIndex, - inst_to_encodings: BTreeMap, - hash_table_offset: Option, - hash_table_len: Option, -} - -impl Level2Table { - fn new(typ: Option, legalize_code: TransformGroupIndex) -> Self { - Self { - typ, - legalize_code, - inst_to_encodings: BTreeMap::new(), - hash_table_offset: None, - hash_table_len: None, - } - } - - fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList { - let copied_typ = self.typ.clone(); - self.inst_to_encodings - .entry(inst.name.clone()) - .or_insert_with(|| EncodingList::new(inst, copied_typ)) - } - - fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> { - self.inst_to_encodings.values_mut() - } - - fn is_empty(&self) -> bool { - self.inst_to_encodings.is_empty() - } - - fn layout_hashtable( - &mut self, - level2_hashtables: &mut Vec>, - level2_doc: &mut HashMap>, - ) { - let hash_table = generate_table( - self.inst_to_encodings.values(), - self.inst_to_encodings.len(), - // TODO the Python code wanted opcode numbers to start from 1. - |enc_list| enc_list.inst.opcode_number.index() + 1, - ); - - let hash_table_offset = level2_hashtables.len(); - let hash_table_len = hash_table.len(); - - assert!(self.hash_table_offset.is_none()); - assert!(self.hash_table_len.is_none()); - self.hash_table_offset = Some(hash_table_offset); - self.hash_table_len = Some(hash_table_len); - - level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| { - opt_enc_list.map(|enc_list| Level2HashTableEntry { - inst_name: enc_list.inst.camel_name.clone(), - offset: enc_list.offset.unwrap(), - }) - })); - - let typ_comment = match &self.typ { - Some(ty) => ty.to_string(), - None => "typeless".into(), - }; - - level2_doc.get_or_default(hash_table_offset).push(format!( - "{:06x}: {}, {} entries", - hash_table_offset, typ_comment, hash_table_len - )); - } -} - -/// The u16 values in an encoding list entry are interpreted as follows: -/// -/// NR = len(all_recipes) -/// -/// entry < 2*NR -/// Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied. -/// If bit 0 is set, stop with the default legalization code. -/// If bit 0 is clear, keep going down the list. -/// entry < PRED_START -/// Stop with legalization code `entry - 2*NR`. -/// -/// Remaining entries are interpreted as (skip, pred) pairs, where: -/// -/// skip = (entry - PRED_START) >> PRED_BITS -/// pred = (entry - PRED_START) & PRED_MASK -/// -/// If the predicate is satisfied, keep going. Otherwise skip over the next -/// `skip` entries. If skip == 0, stop with the default legalization code. -/// -/// The `pred` predicate number is interpreted as an instruction predicate if it -/// is in range, otherwise an ISA predicate. - -/// Encoding lists are represented as u16 arrays. -const CODE_BITS: usize = 16; - -/// Beginning of the predicate code words. -const PRED_START: u16 = 0x1000; - -/// Number of bits used to hold a predicate number (instruction + ISA predicates). -const PRED_BITS: usize = 12; - -/// Mask for extracting the predicate number. -const PRED_MASK: usize = (1 << PRED_BITS) - 1; - -/// Encoder for the list format above. -struct Encoder { - num_instruction_predicates: usize, - - /// u16 encoding list words. - words: Vec, - - /// Documentation comments: Index into `words` + comment. - docs: Vec<(usize, String)>, -} - -impl Encoder { - fn new(num_instruction_predicates: usize) -> Self { - Self { - num_instruction_predicates, - words: Vec::new(), - docs: Vec::new(), - } - } - - /// Add a recipe+bits entry to the list. - fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) { - let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16; - assert!(code < PRED_START); - - let doc = format!( - "--> {}{}", - enc.to_rust_comment(recipes), - if is_final { " and stop" } else { "" } - ); - self.docs.push((self.words.len(), doc)); - - self.words.push(code); - self.words.push(enc.encbits); - } - - /// Add a predicate entry. - fn pred(&mut self, pred_comment: String, skip: usize, n: usize) { - assert!(n <= PRED_MASK); - let entry = (PRED_START as usize) + (n | (skip << PRED_BITS)); - assert!(entry < (1 << CODE_BITS)); - let entry = entry as u16; - - let doc = if skip == 0 { - "stop".to_string() - } else { - format!("skip {}", skip) - }; - let doc = format!("{} unless {}", doc, pred_comment); - - self.docs.push((self.words.len(), doc)); - self.words.push(entry); - } - - /// Add an instruction predicate entry. - fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) { - let number = pred.index(); - let pred_comment = format!("inst_predicate_{}", number); - self.pred(pred_comment, skip, number); - } - - /// Add an ISA predicate entry. - fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) { - // ISA predicates follow the instruction predicates. - let n = self.num_instruction_predicates + (pred as usize); - let pred_comment = format!("PredicateView({})", pred); - self.pred(pred_comment, skip, n); - } -} - -/// List of instructions for encoding a given type + opcode pair. -/// -/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16 -/// values. -struct EncodingList { - inst: Instruction, - typ: Option, - encodings: Vec, - offset: Option, -} - -impl EncodingList { - fn new(inst: &Instruction, typ: Option) -> Self { - Self { - inst: inst.clone(), - typ, - encodings: Default::default(), - offset: None, - } - } - - /// Encode this list as a sequence of u16 numbers. - /// - /// Adds the sequence to `enc_lists` and records the returned offset as - /// `self.offset`. - /// - /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets. - fn encode( - &mut self, - isa: &TargetIsa, - cpu_mode: &CpuMode, - enc_lists: &mut UniqueSeqTable, - enc_lists_doc: &mut HashMap>, - ) { - assert!(!self.encodings.is_empty()); - - let mut encoder = Encoder::new(isa.encodings_predicates.len()); - - let mut index = 0; - while index < self.encodings.len() { - let encoding = &self.encodings[index]; - - // Try to see how many encodings are following and have the same ISA predicate and - // instruction predicate, so as to reduce the number of tests carried out by the - // encoding list interpreter.. - // - // Encodings with similar tests are hereby called a group. The group includes the - // current encoding we're looking at. - let (isa_predicate, inst_predicate) = - (&encoding.isa_predicate, &encoding.inst_predicate); - - let group_size = { - let mut group_size = 1; - while index + group_size < self.encodings.len() { - let next_encoding = &self.encodings[index + group_size]; - if &next_encoding.inst_predicate != inst_predicate - || &next_encoding.isa_predicate != isa_predicate - { - break; - } - group_size += 1; - } - group_size - }; - - let is_last_group = index + group_size == self.encodings.len(); - - // The number of entries to skip when a predicate isn't satisfied is the size of both - // predicates + the size of the group, minus one (for this predicate). Each recipe - // entry has a size of two u16 (recipe index + bits). - let mut skip = if is_last_group { - 0 - } else { - let isap_size = match isa_predicate { - Some(_) => 1, - None => 0, - }; - let instp_size = match inst_predicate { - Some(_) => 1, - None => 0, - }; - isap_size + instp_size + group_size * 2 - 1 - }; - - if let Some(pred) = isa_predicate { - encoder.isa_predicate(*pred, skip); - if !is_last_group { - skip -= 1; - } - } - - if let Some(pred) = inst_predicate { - encoder.inst_predicate(*pred, skip); - // No need to update skip, it's dead after this point. - } - - for i in 0..group_size { - let encoding = &self.encodings[index + i]; - let is_last_encoding = index + i == self.encodings.len() - 1; - encoder.recipe(&isa.recipes, encoding, is_last_encoding); - } - - index += group_size; - } - - assert!(self.offset.is_none()); - let offset = enc_lists.add(&encoder.words); - self.offset = Some(offset); - - // Doc comments. - let recipe_typ_mode_name = format!( - "{}{} ({})", - self.inst.name, - if let Some(typ) = &self.typ { - format!(".{}", typ.to_string()) - } else { - "".into() - }, - cpu_mode.name - ); - - enc_lists_doc - .get_or_default(offset) - .push(format!("{:06x}: {}", offset, recipe_typ_mode_name)); - for (pos, doc) in encoder.docs { - enc_lists_doc.get_or_default(offset + pos).push(doc); - } - enc_lists_doc - .get_or_default(offset + encoder.words.len()) - .insert(0, format!("end of {}", recipe_typ_mode_name)); - } -} - -fn make_tables(cpu_mode: &CpuMode) -> Level1Table { - let mut table = Level1Table::new(cpu_mode); - - for encoding in &cpu_mode.encodings { - table - .l2table_for(encoding.bound_type.clone()) - .enclist_for(encoding.inst()) - .encodings - .push(encoding.clone()); - } - - // Ensure there are level 1 table entries for all types with a custom legalize action. - for value_type in cpu_mode.get_legalized_types() { - table.l2table_for(Some(value_type.clone())); - } - // ... and also for monomorphic instructions. - table.l2table_for(None); - - table -} - -/// Compute encodings and doc comments for encoding lists in `level1`. -fn encode_enclists( - isa: &TargetIsa, - cpu_mode: &CpuMode, - level1: &mut Level1Table, - enc_lists: &mut UniqueSeqTable, - enc_lists_doc: &mut HashMap>, -) { - for level2 in level1.l2tables() { - for enclist in level2.enclists() { - enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc); - } - } -} - -fn encode_level2_hashtables<'a>( - level1: &'a mut Level1Table, - level2_hashtables: &mut Vec>, - level2_doc: &mut HashMap>, -) { - for level2 in level1.l2tables() { - level2.layout_hashtable(level2_hashtables, level2_doc); - } -} - -fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { - // Level 1 tables, one per CPU mode. - let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new(); - - // Single table containing all the level2 hash tables. - let mut level2_hashtables = Vec::new(); - let mut level2_doc: HashMap> = HashMap::new(); - - // Tables for encoding lists with comments. - let mut enc_lists = UniqueSeqTable::new(); - let mut enc_lists_doc = HashMap::new(); - - for cpu_mode in &isa.cpu_modes { - level2_doc - .get_or_default(level2_hashtables.len()) - .push(cpu_mode.name.into()); - - let mut level1 = make_tables(cpu_mode); - - encode_enclists( - isa, - cpu_mode, - &mut level1, - &mut enc_lists, - &mut enc_lists_doc, - ); - encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc); - - level1_tables.insert(cpu_mode.name, level1); - } - - // Compute an appropriate Rust integer type to use for offsets into a table of the given length. - let offset_type = |length: usize| { - if length <= 0x10000 { - "u16" - } else { - assert!(u32::try_from(length).is_ok(), "table too big!"); - "u32" - } - }; - - let level1_offset_type = offset_type(level2_hashtables.len()); - let level2_offset_type = offset_type(enc_lists.len()); - - // Emit encoding lists. - fmt.doc_comment( - format!(r#"{} encoding lists. - - This contains the entire encodings bytecode for every single instruction; the encodings - interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2 - table entries below."#, isa.name) - ); - fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len()); - fmt.indent(|fmt| { - let mut line = Vec::new(); - for (index, entry) in enc_lists.iter().enumerate() { - if let Some(comments) = enc_lists_doc.get(&index) { - if !line.is_empty() { - fmtln!(fmt, "{},", line.join(", ")); - line.clear(); - } - for comment in comments { - fmt.comment(comment); - } - } - line.push(format!("{:#06x}", entry)); - } - if !line.is_empty() { - fmtln!(fmt, "{},", line.join(", ")); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - - // Emit the full concatenation of level 2 hash tables. - fmt.doc_comment(format!( - r#"{} level 2 hash tables. - - This hash table, keyed by instruction opcode, contains all the starting offsets for the - encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the - instruction's controlling type in the level 1 hash table."#, - isa.name - )); - fmtln!( - fmt, - "pub static LEVEL2: [Level2Entry<{}>; {}] = [", - level2_offset_type, - level2_hashtables.len() - ); - fmt.indent(|fmt| { - for (offset, entry) in level2_hashtables.iter().enumerate() { - if let Some(comments) = level2_doc.get(&offset) { - for comment in comments { - fmt.comment(comment); - } - } - if let Some(entry) = entry { - fmtln!( - fmt, - "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},", - entry.inst_name, - entry.offset - ); - } else { - fmt.line("Level2Entry { opcode: None, offset: 0 },"); - } - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - - // Emit a level 1 hash table for each CPU mode. - for cpu_mode in &isa.cpu_modes { - let level1 = &level1_tables.get(cpu_mode.name).unwrap(); - let hash_table = generate_table( - level1.table_vec.iter(), - level1.table_vec.len(), - |level2_table| { - if let Some(typ) = &level2_table.typ { - typ.number().expect("type without a number") as usize - } else { - 0 - } - }, - ); - - fmt.doc_comment(format!( - r#"{} level 1 hash table for the CPU mode {}. - - This hash table, keyed by instruction controlling type, contains all the level 2 - hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating - which legalization scheme to apply when the instruction doesn't have any valid encoding for - this CPU mode. - "#, - isa.name, cpu_mode.name - )); - fmtln!( - fmt, - "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [", - cpu_mode.name.to_uppercase(), - level1_offset_type, - hash_table.len() - ); - fmt.indent(|fmt| { - for opt_level2 in hash_table { - let level2 = match opt_level2 { - None => { - // Empty hash table entry. Include the default legalization action. - fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},", - isa.translate_group_index(level1.legalize_code)); - continue; - } - Some(level2) => level2, - }; - - let legalize_comment = defs.transform_groups.get(level2.legalize_code).name; - let legalize_code = isa.translate_group_index(level2.legalize_code); - - let typ_name = if let Some(typ) = &level2.typ { - typ.rust_name() - } else { - "ir::types::INVALID".into() - }; - - if level2.is_empty() { - // Empty level 2 table: Only a specialized legalization action, no actual - // table. - // Set an offset that is out of bounds, but make sure it doesn't overflow its - // type when adding `1< 0, "Level2 hash table was too small."); - fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}", - typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - } -} - -fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { - // Make the `RECIPE_PREDICATES` table. - emit_recipe_predicates(isa, fmt); - - // Make the `INST_PREDICATES` table. - emit_inst_predicates(isa, fmt); - - emit_encoding_tables(defs, isa, fmt); - - emit_recipe_names(isa, fmt); - emit_recipe_constraints(isa, fmt); - emit_recipe_sizing(isa, fmt); - - // Finally, tie it all together in an `EncInfo`. - fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {"); - fmt.indent(|fmt| { - fmt.line("constraints: &RECIPE_CONSTRAINTS,"); - fmt.line("sizing: &RECIPE_SIZING,"); - fmt.line("names: &RECIPE_NAMES,"); - }); - fmt.line("};"); -} - -pub(crate) fn generate( - defs: &SharedDefinitions, - isa: &TargetIsa, - filename: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(defs, isa, &mut fmt); - fmt.update_file(filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 88f7ed6b09..bfa1cd0056 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -7,8 +7,6 @@ mod srcgen; pub mod error; pub mod isa; -mod gen_binemit; -mod gen_encodings; mod gen_inst; mod gen_legalizer; mod gen_registers; @@ -81,20 +79,6 @@ pub fn generate( &format!("settings-{}.rs", isa.name), &out_dir, )?; - - gen_encodings::generate( - &shared_defs, - &isa, - &format!("encoding-{}.rs", isa.name), - &out_dir, - )?; - - gen_binemit::generate( - &isa.name, - &isa.recipes, - &format!("binemit-{}.rs", isa.name), - &out_dir, - )?; } for isa in new_backend_isas { From 18bd27e90bbe92f0491e58510cc568b642987311 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 12:55:08 +0200 Subject: [PATCH 05/14] Remove legalizer support from cranelift-codegen-meta --- cranelift/codegen/meta/src/cdsl/ast.rs | 751 ------------ cranelift/codegen/meta/src/cdsl/cpu_modes.rs | 30 - .../codegen/meta/src/cdsl/instructions.rs | 259 +--- cranelift/codegen/meta/src/cdsl/isa.rs | 73 +- cranelift/codegen/meta/src/cdsl/mod.rs | 5 - cranelift/codegen/meta/src/cdsl/recipes.rs | 165 --- .../codegen/meta/src/cdsl/type_inference.rs | 653 +--------- cranelift/codegen/meta/src/cdsl/typevar.rs | 290 +---- cranelift/codegen/meta/src/cdsl/xform.rs | 478 -------- cranelift/codegen/meta/src/gen_legalizer.rs | 734 ----------- cranelift/codegen/meta/src/isa/arm32/mod.rs | 19 +- cranelift/codegen/meta/src/isa/arm64/mod.rs | 19 +- cranelift/codegen/meta/src/isa/s390x/mod.rs | 15 +- cranelift/codegen/meta/src/isa/x86/mod.rs | 15 +- cranelift/codegen/meta/src/lib.rs | 16 - .../codegen/meta/src/shared/instructions.rs | 6 +- cranelift/codegen/meta/src/shared/legalize.rs | 1087 ----------------- cranelift/codegen/meta/src/shared/mod.rs | 12 +- cranelift/codegen/meta/src/srcgen.rs | 15 - cranelift/codegen/meta/src/unique_table.rs | 3 - 20 files changed, 15 insertions(+), 4630 deletions(-) delete mode 100644 cranelift/codegen/meta/src/cdsl/ast.rs delete mode 100644 cranelift/codegen/meta/src/cdsl/cpu_modes.rs delete mode 100644 cranelift/codegen/meta/src/cdsl/recipes.rs delete mode 100644 cranelift/codegen/meta/src/cdsl/xform.rs delete mode 100644 cranelift/codegen/meta/src/gen_legalizer.rs delete mode 100644 cranelift/codegen/meta/src/shared/legalize.rs diff --git a/cranelift/codegen/meta/src/cdsl/ast.rs b/cranelift/codegen/meta/src/cdsl/ast.rs deleted file mode 100644 index f4f2afe273..0000000000 --- a/cranelift/codegen/meta/src/cdsl/ast.rs +++ /dev/null @@ -1,751 +0,0 @@ -use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate}; -use crate::cdsl::operands::{OperandKind, OperandKindFields}; -use crate::cdsl::types::ValueType; -use crate::cdsl::typevar::{TypeSetBuilder, TypeVar}; - -use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue}; - -use std::fmt; -use std::iter::IntoIterator; - -pub(crate) enum Expr { - Var(VarIndex), - Literal(Literal), -} - -impl Expr { - pub fn maybe_literal(&self) -> Option<&Literal> { - match &self { - Expr::Literal(lit) => Some(lit), - _ => None, - } - } - - pub fn maybe_var(&self) -> Option { - if let Expr::Var(var) = &self { - Some(*var) - } else { - None - } - } - - pub fn unwrap_var(&self) -> VarIndex { - self.maybe_var() - .expect("tried to unwrap a non-Var content in Expr::unwrap_var") - } - - pub fn to_rust_code(&self, var_pool: &VarPool) -> String { - match self { - Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(), - Expr::Literal(literal) => literal.to_rust_code(), - } - } -} - -/// An AST definition associates a set of variables with the values produced by an expression. -pub(crate) struct Def { - pub apply: Apply, - pub defined_vars: Vec, -} - -impl Def { - pub fn to_comment_string(&self, var_pool: &VarPool) -> String { - let results = self - .defined_vars - .iter() - .map(|&x| var_pool.get(x).name.as_str()) - .collect::>(); - - let results = if results.len() == 1 { - results[0].to_string() - } else { - format!("({})", results.join(", ")) - }; - - format!("{} := {}", results, self.apply.to_comment_string(var_pool)) - } -} - -pub(crate) struct DefPool { - pool: PrimaryMap, -} - -impl DefPool { - pub fn new() -> Self { - Self { - pool: PrimaryMap::new(), - } - } - pub fn get(&self, index: DefIndex) -> &Def { - self.pool.get(index).unwrap() - } - pub fn next_index(&self) -> DefIndex { - self.pool.next_key() - } - pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec) -> DefIndex { - self.pool.push(Def { - apply, - defined_vars, - }) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct DefIndex(u32); -entity_impl!(DefIndex); - -/// A definition which would lead to generate a block creation. -#[derive(Clone)] -pub(crate) struct Block { - /// Instruction index after which the block entry is set. - pub location: DefIndex, - /// Variable holding the new created block. - pub name: VarIndex, -} - -pub(crate) struct BlockPool { - pool: SparseMap, -} - -impl SparseMapValue for Block { - fn key(&self) -> DefIndex { - self.location - } -} - -impl BlockPool { - pub fn new() -> Self { - Self { - pool: SparseMap::new(), - } - } - pub fn get(&self, index: DefIndex) -> Option<&Block> { - self.pool.get(index) - } - pub fn create_block(&mut self, name: VarIndex, location: DefIndex) { - if self.pool.contains_key(location) { - panic!("Attempt to insert 2 blocks after the same instruction") - } - self.pool.insert(Block { location, name }); - } - pub fn is_empty(&self) -> bool { - self.pool.is_empty() - } -} - -// Implement IntoIterator such that we can iterate over blocks which are in the block pool. -impl<'a> IntoIterator for &'a BlockPool { - type Item = <&'a SparseMap as IntoIterator>::Item; - type IntoIter = <&'a SparseMap as IntoIterator>::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.pool.into_iter() - } -} - -#[derive(Clone, Debug)] -pub(crate) enum Literal { - /// A value of an enumerated immediate operand. - /// - /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values - /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one - /// of the values. - Enumerator { - rust_type: &'static str, - value: &'static str, - }, - - /// A bitwise value of an immediate operand, used for bitwise exact floating point constants. - Bits { rust_type: &'static str, value: u64 }, - - /// A value of an integer immediate operand. - Int(i64), - - /// A empty list of variable set of arguments. - EmptyVarArgs, -} - -impl Literal { - pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self { - let value = match &kind.fields { - OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| { - panic!( - "nonexistent value '{}' in enumeration '{}'", - value, kind.rust_type - ) - }), - _ => panic!("enumerator is for enum values"), - }; - Literal::Enumerator { - rust_type: kind.rust_type, - value, - } - } - - pub fn bits(kind: &OperandKind, bits: u64) -> Self { - match kind.fields { - OperandKindFields::ImmValue => {} - _ => panic!("bits_of is for immediate scalar types"), - } - Literal::Bits { - rust_type: kind.rust_type, - value: bits, - } - } - - pub fn constant(kind: &OperandKind, value: i64) -> Self { - match kind.fields { - OperandKindFields::ImmValue => {} - _ => panic!("constant is for immediate scalar types"), - } - Literal::Int(value) - } - - pub fn empty_vararg() -> Self { - Literal::EmptyVarArgs - } - - pub fn to_rust_code(&self) -> String { - match self { - Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value), - Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value), - Literal::Int(val) => val.to_string(), - Literal::EmptyVarArgs => "&[]".into(), - } - } -} - -#[derive(Clone, Copy, Debug)] -pub(crate) enum PatternPosition { - Source, - Destination, -} - -/// A free variable. -/// -/// When variables are used in `XForms` with source and destination patterns, they are classified -/// as follows: -/// -/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in -/// the destination pattern too, but no new inputs can be introduced. -/// -/// Output values: Variables that are defined in both the source and destination pattern. These -/// values may have uses outside the source pattern, and the destination pattern must compute the -/// same value. -/// -/// Intermediate values: Values that are defined in the source pattern, but not in the destination -/// pattern. These may have uses outside the source pattern, so the defining instruction can't be -/// deleted immediately. -/// -/// Temporary values are defined only in the destination pattern. -pub(crate) struct Var { - pub name: String, - - /// The `Def` defining this variable in a source pattern. - pub src_def: Option, - - /// The `Def` defining this variable in a destination pattern. - pub dst_def: Option, - - /// TypeVar representing the type of this variable. - type_var: Option, - - /// Is this the original type variable, or has it be redefined with set_typevar? - is_original_type_var: bool, -} - -impl Var { - fn new(name: String) -> Self { - Self { - name, - src_def: None, - dst_def: None, - type_var: None, - is_original_type_var: false, - } - } - - /// Is this an input value to the src pattern? - pub fn is_input(&self) -> bool { - self.src_def.is_none() && self.dst_def.is_none() - } - - /// Is this an output value, defined in both src and dst patterns? - pub fn is_output(&self) -> bool { - self.src_def.is_some() && self.dst_def.is_some() - } - - /// Is this an intermediate value, defined only in the src pattern? - pub fn is_intermediate(&self) -> bool { - self.src_def.is_some() && self.dst_def.is_none() - } - - /// Is this a temp value, defined only in the dst pattern? - pub fn is_temp(&self) -> bool { - self.src_def.is_none() && self.dst_def.is_some() - } - - /// Get the def of this variable according to the position. - pub fn get_def(&self, position: PatternPosition) -> Option { - match position { - PatternPosition::Source => self.src_def, - PatternPosition::Destination => self.dst_def, - } - } - - pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) { - assert!( - self.get_def(position).is_none(), - "redefinition of variable {}", - self.name - ); - match position { - PatternPosition::Source => { - self.src_def = Some(def); - } - PatternPosition::Destination => { - self.dst_def = Some(def); - } - } - } - - /// Get the type variable representing the type of this variable. - pub fn get_or_create_typevar(&mut self) -> TypeVar { - match &self.type_var { - Some(tv) => tv.clone(), - None => { - // Create a new type var in which we allow all types. - let tv = TypeVar::new( - format!("typeof_{}", self.name), - format!("Type of the pattern variable {:?}", self), - TypeSetBuilder::all(), - ); - self.type_var = Some(tv.clone()); - self.is_original_type_var = true; - tv - } - } - } - pub fn get_typevar(&self) -> Option { - self.type_var.clone() - } - pub fn set_typevar(&mut self, tv: TypeVar) { - self.is_original_type_var = if let Some(previous_tv) = &self.type_var { - *previous_tv == tv - } else { - false - }; - self.type_var = Some(tv); - } - - /// Check if this variable has a free type variable. If not, the type of this variable is - /// computed from the type of another variable. - pub fn has_free_typevar(&self) -> bool { - match &self.type_var { - Some(tv) => tv.base.is_none() && self.is_original_type_var, - None => false, - } - } - - pub fn to_rust_code(&self) -> String { - self.name.clone() - } - fn rust_type(&self) -> String { - self.type_var.as_ref().unwrap().to_rust_code() - } -} - -impl fmt::Debug for Var { - fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { - fmt.write_fmt(format_args!( - "Var({}{}{})", - self.name, - if self.src_def.is_some() { ", src" } else { "" }, - if self.dst_def.is_some() { ", dst" } else { "" } - )) - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct VarIndex(u32); -entity_impl!(VarIndex); - -pub(crate) struct VarPool { - pool: PrimaryMap, -} - -impl VarPool { - pub fn new() -> Self { - Self { - pool: PrimaryMap::new(), - } - } - pub fn get(&self, index: VarIndex) -> &Var { - self.pool.get(index).unwrap() - } - pub fn get_mut(&mut self, index: VarIndex) -> &mut Var { - self.pool.get_mut(index).unwrap() - } - pub fn create(&mut self, name: impl Into) -> VarIndex { - self.pool.push(Var::new(name.into())) - } -} - -/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when -/// the legalizer code is generated. The constant data is named in the order it is inserted; -/// inserting data using [insert] will avoid duplicates. -/// -/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html -/// [insert]: ConstPool::insert -pub(crate) struct ConstPool { - pool: Vec>, -} - -impl ConstPool { - /// Create an empty constant pool. - pub fn new() -> Self { - Self { pool: vec![] } - } - - /// Create a name for a constant from its position in the pool. - fn create_name(position: usize) -> String { - format!("const{}", position) - } - - /// Insert constant data into the pool, returning the name of the variable used to reference it. - /// This method will search for data that matches the new data and return the existing constant - /// name to avoid duplicates. - pub fn insert(&mut self, data: Vec) -> String { - let possible_position = self.pool.iter().position(|d| d == &data); - let position = if let Some(found_position) = possible_position { - found_position - } else { - let new_position = self.pool.len(); - self.pool.push(data); - new_position - }; - ConstPool::create_name(position) - } - - /// Iterate over the name/value pairs in the pool. - pub fn iter(&self) -> impl Iterator)> { - self.pool - .iter() - .enumerate() - .map(|(i, v)| (ConstPool::create_name(i), v)) - } -} - -/// Apply an instruction to arguments. -/// -/// An `Apply` AST expression is created by using function call syntax on instructions. This -/// applies to both bound and unbound polymorphic instructions. -pub(crate) struct Apply { - pub inst: Instruction, - pub args: Vec, - pub value_types: Vec, -} - -impl Apply { - pub fn new(target: InstSpec, args: Vec) -> Self { - let (inst, value_types) = match target { - InstSpec::Inst(inst) => (inst, Vec::new()), - InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types), - }; - - // Apply should only operate on concrete value types, not "any". - let value_types = value_types - .into_iter() - .map(|vt| vt.expect()) - .collect(); - - // Basic check on number of arguments. - assert!( - inst.operands_in.len() == args.len(), - "incorrect number of arguments in instruction {}", - inst.name - ); - - // Check that the kinds of Literals arguments match the expected operand. - for &imm_index in &inst.imm_opnums { - let arg = &args[imm_index]; - if let Some(literal) = arg.maybe_literal() { - let op = &inst.operands_in[imm_index]; - match &op.kind.fields { - OperandKindFields::ImmEnum(values) => { - if let Literal::Enumerator { value, .. } = literal { - assert!( - values.iter().any(|(_key, v)| v == value), - "Nonexistent enum value '{}' passed to field of kind '{}' -- \ - did you use the right enum?", - value, - op.kind.rust_type - ); - } else { - panic!( - "Passed non-enum field value {:?} to field of kind {}", - literal, op.kind.rust_type - ); - } - } - OperandKindFields::ImmValue => match &literal { - Literal::Enumerator { value, .. } => panic!( - "Expected immediate value in immediate field of kind '{}', \ - obtained enum value '{}'", - op.kind.rust_type, value - ), - Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {} - }, - _ => { - panic!( - "Literal passed to non-literal field of kind {}", - op.kind.rust_type - ); - } - } - } - } - - Self { - inst, - args, - value_types, - } - } - - fn to_comment_string(&self, var_pool: &VarPool) -> String { - let args = self - .args - .iter() - .map(|arg| arg.to_rust_code(var_pool)) - .collect::>() - .join(", "); - - let mut inst_and_bound_types = vec![self.inst.name.to_string()]; - inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string())); - let inst_name = inst_and_bound_types.join("."); - - format!("{}({})", inst_name, args) - } - - pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate { - let mut pred = InstructionPredicate::new(); - for (format_field, &op_num) in self - .inst - .format - .imm_fields - .iter() - .zip(self.inst.imm_opnums.iter()) - { - let arg = &self.args[op_num]; - if arg.maybe_var().is_some() { - // Ignore free variables for now. - continue; - } - pred = pred.and(InstructionPredicate::new_is_field_equal_ast( - &*self.inst.format, - format_field, - arg.to_rust_code(var_pool), - )); - } - - // Add checks for any bound secondary type variables. We can't check the controlling type - // variable this way since it may not appear as the type of an operand. - if self.value_types.len() > 1 { - let poly = self - .inst - .polymorphic_info - .as_ref() - .expect("must have polymorphic info if it has bounded types"); - for (bound_type, type_var) in - self.value_types[1..].iter().zip(poly.other_typevars.iter()) - { - pred = pred.and(InstructionPredicate::new_typevar_check( - &self.inst, type_var, bound_type, - )); - } - } - - pred - } - - /// Same as `inst_predicate()`, but also check the controlling type variable. - pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate { - let mut pred = self.inst_predicate(var_pool); - - if !self.value_types.is_empty() { - let bound_type = &self.value_types[0]; - let poly = self.inst.polymorphic_info.as_ref().unwrap(); - let type_check = if poly.use_typevar_operand { - InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type) - } else { - InstructionPredicate::new_ctrl_typevar_check(&bound_type) - }; - pred = pred.and(type_check); - } - - pred - } - - pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String { - let mut args = self - .args - .iter() - .map(|expr| expr.to_rust_code(var_pool)) - .collect::>() - .join(", "); - - // Do we need to pass an explicit type argument? - if let Some(poly) = &self.inst.polymorphic_info { - if !poly.use_typevar_operand { - args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args); - } - } - - format!("{}({})", self.inst.snake_name(), args) - } -} - -// Simple helpers for legalize actions construction. - -pub(crate) enum DummyExpr { - Var(DummyVar), - Literal(Literal), - Constant(DummyConstant), - Apply(InstSpec, Vec), - Block(DummyVar), -} - -#[derive(Clone)] -pub(crate) struct DummyVar { - pub name: String, -} - -impl Into for DummyVar { - fn into(self) -> DummyExpr { - DummyExpr::Var(self) - } -} -impl Into for Literal { - fn into(self) -> DummyExpr { - DummyExpr::Literal(self) - } -} - -#[derive(Clone)] -pub(crate) struct DummyConstant(pub(crate) Vec); - -impl Into for DummyConstant { - fn into(self) -> DummyExpr { - DummyExpr::Constant(self) - } -} - -pub(crate) fn var(name: &str) -> DummyVar { - DummyVar { - name: name.to_owned(), - } -} - -pub(crate) struct DummyDef { - pub expr: DummyExpr, - pub defined_vars: Vec, -} - -pub(crate) struct ExprBuilder { - expr: DummyExpr, -} - -impl ExprBuilder { - pub fn apply(inst: InstSpec, args: Vec) -> Self { - let expr = DummyExpr::Apply(inst, args); - Self { expr } - } - - pub fn assign_to(self, defined_vars: Vec) -> DummyDef { - DummyDef { - expr: self.expr, - defined_vars, - } - } - - pub fn block(name: DummyVar) -> Self { - let expr = DummyExpr::Block(name); - Self { expr } - } -} - -macro_rules! def_rhs { - // inst(a, b, c) - ($inst:ident($($src:expr),*)) => { - ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*]) - }; - - // inst.type(a, b, c) - ($inst:ident.$type:ident($($src:expr),*)) => { - ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*]) - }; -} - -// Helper macro to define legalization recipes. -macro_rules! def { - // x = ... - ($dest:ident = $($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(vec![$dest.clone()]) - }; - - // (x, y, ...) = ... - (($($dest:ident),*) = $($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*]) - }; - - // An instruction with no results. - ($($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(Vec::new()) - } -} - -// Helper macro to define legalization recipes. -macro_rules! block { - // a basic block definition, splitting the current block in 2. - ($block: ident) => { - ExprBuilder::block($block).assign_to(Vec::new()) - }; -} - -#[cfg(test)] -mod tests { - use crate::cdsl::ast::ConstPool; - - #[test] - fn const_pool_returns_var_names() { - let mut c = ConstPool::new(); - assert_eq!(c.insert([0, 1, 2].to_vec()), "const0"); - assert_eq!(c.insert([1, 2, 3].to_vec()), "const1"); - } - - #[test] - fn const_pool_avoids_duplicates() { - let data = [0, 1, 2].to_vec(); - let mut c = ConstPool::new(); - assert_eq!(c.pool.len(), 0); - - assert_eq!(c.insert(data.clone()), "const0"); - assert_eq!(c.pool.len(), 1); - - assert_eq!(c.insert(data), "const0"); - assert_eq!(c.pool.len(), 1); - } - - #[test] - fn const_pool_iterates() { - let mut c = ConstPool::new(); - c.insert([0, 1, 2].to_vec()); - c.insert([3, 4, 5].to_vec()); - - let mut iter = c.iter(); - assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2]))); - assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5]))); - assert_eq!(iter.next(), None); - } -} diff --git a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs deleted file mode 100644 index e42a27da2a..0000000000 --- a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; - -use crate::cdsl::types::ValueType; -use crate::cdsl::xform::TransformGroupIndex; - -pub(crate) struct CpuMode { - pub name: &'static str, - default_legalize: Option, - monomorphic_legalize: Option, - typed_legalize: HashMap, -} - -impl CpuMode { - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly - /// reachable set of TransformGroup this TargetIsa uses. - pub fn direct_transform_groups(&self) -> Vec { - let mut set = HashSet::new(); - if let Some(i) = &self.default_legalize { - set.insert(*i); - } - if let Some(i) = &self.monomorphic_legalize { - set.insert(*i); - } - set.extend(self.typed_legalize.values().cloned()); - let mut ret = Vec::from_iter(set); - ret.sort(); - ret - } -} diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index f7258ea300..50b3a82cc1 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -5,7 +5,7 @@ use std::fmt::{Display, Error, Formatter}; use std::rc::Rc; use crate::cdsl::camel_case; -use crate::cdsl::formats::{FormatField, InstructionFormat}; +use crate::cdsl::formats::InstructionFormat; use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint; use crate::cdsl::types::{LaneType, ReferenceType, ValueType}; @@ -21,46 +21,20 @@ pub(crate) type AllInstructions = PrimaryMap; pub(crate) struct InstructionGroupBuilder<'all_inst> { all_instructions: &'all_inst mut AllInstructions, - own_instructions: Vec, } impl<'all_inst> InstructionGroupBuilder<'all_inst> { pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self { Self { all_instructions, - own_instructions: Vec::new(), } } pub fn push(&mut self, builder: InstructionBuilder) { let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32()); let inst = builder.build(opcode_number); - // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent. - self.own_instructions.push(inst.clone()); self.all_instructions.push(inst); } - - pub fn build(self) -> InstructionGroup { - InstructionGroup { - instructions: self.own_instructions, - } - } -} - -/// Every instruction must belong to exactly one instruction group. A given -/// target architecture can support instructions from multiple groups, and it -/// does not necessarily support all instructions in a group. -pub(crate) struct InstructionGroup { - instructions: Vec, -} - -impl InstructionGroup { - pub fn by_name(&self, name: &'static str) -> &Instruction { - self.instructions - .iter() - .find(|inst| inst.name == name) - .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name)) - } } /// Instructions can have parameters bound to them to specialize them for more specific encodings @@ -143,17 +117,6 @@ impl InstructionContent { &self.name } } - - pub fn all_typevars(&self) -> Vec<&TypeVar> { - match &self.polymorphic_info { - Some(poly) => { - let mut result = vec![&poly.ctrl_typevar]; - result.extend(&poly.other_typevars); - result - } - None => Vec::new(), - } - } } pub(crate) type Instruction = Rc; @@ -375,20 +338,6 @@ impl InstructionBuilder { } } -/// A thin wrapper like Option, but with more precise semantics. -#[derive(Clone)] -pub(crate) enum ValueTypeOrAny { - ValueType(ValueType), -} - -impl ValueTypeOrAny { - pub fn expect(self) -> ValueType { - match self { - ValueTypeOrAny::ValueType(vt) => vt, - } - } -} - /// An parameter used for binding instructions to specific types or values pub(crate) enum BindParameter { Lane(LaneType), @@ -439,7 +388,7 @@ impl Display for Immediate { #[derive(Clone)] pub(crate) struct BoundInstruction { pub inst: Instruction, - pub value_types: Vec, + pub value_types: Vec, pub immediate_values: Vec, } @@ -502,11 +451,11 @@ impl Bindable for BoundInstruction { match parameter.into() { BindParameter::Lane(lane_type) => modified .value_types - .push(ValueTypeOrAny::ValueType(lane_type.into())), + .push(lane_type.into()), BindParameter::Reference(reference_type) => { modified .value_types - .push(ValueTypeOrAny::ValueType(reference_type.into())); + .push(reference_type.into()); } } modified.verify_bindings().unwrap(); @@ -719,206 +668,6 @@ fn is_ctrl_typevar_candidate( Ok(other_typevars) } -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum FormatPredicateKind { - /// Is the field member equal to the expected value (stored here)? - IsEqual(String), -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) struct FormatPredicateNode { - format_name: &'static str, - member_name: &'static str, - kind: FormatPredicateKind, -} - -impl FormatPredicateNode { - fn new_raw( - format: &InstructionFormat, - member_name: &'static str, - kind: FormatPredicateKind, - ) -> Self { - Self { - format_name: format.name, - member_name, - kind, - } - } - - fn rust_predicate(&self) -> String { - match &self.kind { - FormatPredicateKind::IsEqual(arg) => { - format!("predicates::is_equal({}, {})", self.member_name, arg) - } - } - } -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum TypePredicateNode { - /// Is the value argument (at the index designated by the first member) the same type as the - /// type name (second member)? - TypeVarCheck(usize, String), - - /// Is the controlling type variable the same type as the one designated by the type name - /// (only member)? - CtrlTypeVarCheck(String), -} - -impl TypePredicateNode { - fn rust_predicate(&self, func_str: &str) -> String { - match self { - TypePredicateNode::TypeVarCheck(index, value_type_name) => format!( - "{}.dfg.value_type(args[{}]) == {}", - func_str, index, value_type_name - ), - TypePredicateNode::CtrlTypeVarCheck(value_type_name) => { - format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name) - } - } - } -} - -/// A basic node in an instruction predicate: either an atom, or an AND of two conditions. -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum InstructionPredicateNode { - FormatPredicate(FormatPredicateNode), - - TypePredicate(TypePredicateNode), - - /// An AND-combination of two or more other predicates. - And(Vec), -} - -impl InstructionPredicateNode { - fn rust_predicate(&self, func_str: &str) -> String { - match self { - InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(), - InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str), - InstructionPredicateNode::And(nodes) => nodes - .iter() - .map(|x| x.rust_predicate(func_str)) - .collect::>() - .join(" && "), - } - } -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) struct InstructionPredicate { - node: Option, -} - -impl Into for InstructionPredicateNode { - fn into(self) -> InstructionPredicate { - InstructionPredicate { node: Some(self) } - } -} - -impl InstructionPredicate { - pub fn new() -> Self { - Self { node: None } - } - - pub fn new_typevar_check( - inst: &Instruction, - type_var: &TypeVar, - value_type: &ValueType, - ) -> InstructionPredicateNode { - let index = inst - .value_opnums - .iter() - .enumerate() - .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var) - .unwrap() - .0; - InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck( - index, - value_type.rust_name(), - )) - } - - pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode { - InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck( - value_type.rust_name(), - )) - } - - /// Used only for the AST module, which directly passes in the format field. - pub fn new_is_field_equal_ast( - format: &InstructionFormat, - field: &FormatField, - imm_value: String, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( - format, - field.member, - FormatPredicateKind::IsEqual(imm_value), - )) - } - - pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { - let node = self.node; - let mut and_nodes = match node { - Some(node) => match node { - InstructionPredicateNode::And(nodes) => nodes, - _ => vec![node], - }, - _ => Vec::new(), - }; - and_nodes.push(new_node); - self.node = Some(InstructionPredicateNode::And(and_nodes)); - self - } - - pub fn rust_predicate(&self, func_str: &str) -> Option { - self.node.as_ref().map(|root| root.rust_predicate(func_str)) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct InstructionPredicateNumber(u32); -entity_impl!(InstructionPredicateNumber); - -pub(crate) type InstructionPredicateMap = - PrimaryMap; - -/// An instruction specification, containing an instruction that has bound types or not. -pub(crate) enum InstSpec { - Inst(Instruction), - Bound(BoundInstruction), -} - -impl InstSpec { - pub fn inst(&self) -> &Instruction { - match &self { - InstSpec::Inst(inst) => inst, - InstSpec::Bound(bound_inst) => &bound_inst.inst, - } - } -} - -impl Bindable for InstSpec { - fn bind(&self, parameter: impl Into) -> BoundInstruction { - match self { - InstSpec::Inst(inst) => inst.bind(parameter.into()), - InstSpec::Bound(inst) => inst.bind(parameter.into()), - } - } -} - -impl Into for &Instruction { - fn into(self) -> InstSpec { - InstSpec::Inst(self.clone()) - } -} - -impl Into for BoundInstruction { - fn into(self) -> InstSpec { - InstSpec::Bound(self) - } -} - #[cfg(test)] mod test { use super::*; diff --git a/cranelift/codegen/meta/src/cdsl/isa.rs b/cranelift/codegen/meta/src/cdsl/isa.rs index 28844c97fb..7eb7c30517 100644 --- a/cranelift/codegen/meta/src/cdsl/isa.rs +++ b/cranelift/codegen/meta/src/cdsl/isa.rs @@ -1,89 +1,18 @@ -use std::collections::HashSet; -use std::iter::FromIterator; - -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::instructions::InstructionPredicateMap; -use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::IsaRegs; use crate::cdsl::settings::SettingGroup; -use crate::cdsl::xform::{TransformGroupIndex, TransformGroups}; pub(crate) struct TargetIsa { pub name: &'static str, pub settings: SettingGroup, pub regs: IsaRegs, - pub recipes: Recipes, - pub cpu_modes: Vec, - pub encodings_predicates: InstructionPredicateMap, - - /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the - /// local array of transform groups that are directly used. We use this map to get this - /// information. - pub local_transform_groups: Vec, } impl TargetIsa { - pub fn new( - name: &'static str, - settings: SettingGroup, - regs: IsaRegs, - recipes: Recipes, - cpu_modes: Vec, - encodings_predicates: InstructionPredicateMap, - ) -> Self { - // Compute the local TransformGroup index. - let mut local_transform_groups = Vec::new(); - for cpu_mode in &cpu_modes { - let transform_groups = cpu_mode.direct_transform_groups(); - for group_index in transform_groups { - // find() is fine here: the number of transform group is < 5 as of June 2019. - if local_transform_groups - .iter() - .find(|&val| group_index == *val) - .is_none() - { - local_transform_groups.push(group_index); - } - } - } - + pub fn new(name: &'static str, settings: SettingGroup, regs: IsaRegs) -> Self { Self { name, settings, regs, - recipes, - cpu_modes, - encodings_predicates, - local_transform_groups, } } - - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the - /// transitive set of TransformGroup this TargetIsa uses. - pub fn transitive_transform_groups( - &self, - all_groups: &TransformGroups, - ) -> Vec { - let mut set = HashSet::new(); - - for &root in self.local_transform_groups.iter() { - set.insert(root); - let mut base = root; - // Follow the chain of chain_with. - while let Some(chain_with) = &all_groups.get(base).chain_with { - set.insert(*chain_with); - base = *chain_with; - } - } - - let mut vec = Vec::from_iter(set); - vec.sort(); - vec - } - - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly - /// reachable set of TransformGroup this TargetIsa uses. - pub fn direct_transform_groups(&self) -> &Vec { - &self.local_transform_groups - } } diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs index bd08ebfe41..a1fb8d4ea3 100644 --- a/cranelift/codegen/meta/src/cdsl/mod.rs +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -3,20 +3,15 @@ //! This module defines the classes that are used to define Cranelift //! instructions and other entities. -#[macro_use] -pub mod ast; -pub mod cpu_modes; pub mod formats; pub mod instructions; pub mod isa; pub mod operands; -pub mod recipes; pub mod regs; pub mod settings; pub mod type_inference; pub mod types; pub mod typevar; -pub mod xform; /// A macro that converts boolean settings into predicates to look more natural. #[macro_export] diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs deleted file mode 100644 index ea9ce5f20b..0000000000 --- a/cranelift/codegen/meta/src/cdsl/recipes.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::rc::Rc; - -use cranelift_entity::{entity_impl, PrimaryMap}; - -use crate::cdsl::formats::InstructionFormat; -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::regs::RegClassIndex; -use crate::cdsl::settings::SettingPredicateNumber; - -/// A specific register in a register class. -/// -/// A register is identified by the top-level register class it belongs to and -/// its first register unit. -/// -/// Specific registers are used to describe constraints on instructions where -/// some operands must use a fixed register. -/// -/// Register instances can be created with the constructor, or accessed as -/// attributes on the register class: `GPR.rcx`. -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub(crate) struct Register { - pub regclass: RegClassIndex, - pub unit: u8, -} - -/// An operand that must be in a stack slot. -/// -/// A `Stack` object can be used to indicate an operand constraint for a value -/// operand that must live in a stack slot. -#[derive(Copy, Clone, Hash, PartialEq)] -pub(crate) struct Stack { - pub regclass: RegClassIndex, -} - -#[derive(Clone, Hash, PartialEq)] -pub(crate) struct BranchRange { - pub inst_size: u64, - pub range: u64, -} - -#[derive(Copy, Clone, Hash, PartialEq)] -pub(crate) enum OperandConstraint { - RegClass(RegClassIndex), - FixedReg(Register), - TiedInput(usize), - Stack(Stack), -} - -impl Into for RegClassIndex { - fn into(self) -> OperandConstraint { - OperandConstraint::RegClass(self) - } -} - -impl Into for Register { - fn into(self) -> OperandConstraint { - OperandConstraint::FixedReg(self) - } -} - -impl Into for usize { - fn into(self) -> OperandConstraint { - OperandConstraint::TiedInput(self) - } -} - -impl Into for Stack { - fn into(self) -> OperandConstraint { - OperandConstraint::Stack(self) - } -} - -/// A recipe for encoding instructions with a given format. -/// -/// Many different instructions can be encoded by the same recipe, but they -/// must all have the same instruction format. -/// -/// The `operands_in` and `operands_out` arguments are tuples specifying the register -/// allocation constraints for the value operands and results respectively. The -/// possible constraints for an operand are: -/// -/// - A `RegClass` specifying the set of allowed registers. -/// - A `Register` specifying a fixed-register operand. -/// - An integer indicating that this result is tied to a value operand, so -/// they must use the same register. -/// - A `Stack` specifying a value in a stack slot. -/// -/// The `branch_range` argument must be provided for recipes that can encode -/// branch instructions. It is an `(origin, bits)` tuple describing the exact -/// range that can be encoded in a branch instruction. -#[derive(Clone)] -pub(crate) struct EncodingRecipe { - /// Short mnemonic name for this recipe. - pub name: String, - - /// Associated instruction format. - pub format: Rc, - - /// Base number of bytes in the binary encoded instruction. - pub base_size: u64, - - /// Tuple of register constraints for value operands. - pub operands_in: Vec, - - /// Tuple of register constraints for results. - pub operands_out: Vec, - - /// Function name to use when computing actual size. - pub compute_size: &'static str, - - /// `(origin, bits)` range for branches. - pub branch_range: Option, - - /// This instruction clobbers `iflags` and `fflags`; true by default. - pub clobbers_flags: bool, - - /// Instruction predicate. - pub inst_predicate: Option, - - /// ISA predicate. - pub isa_predicate: Option, - - /// Rust code for binary emission. - pub emit: Option, -} - -// Implement PartialEq ourselves: take all the fields into account but the name. -impl PartialEq for EncodingRecipe { - fn eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.format, &other.format) - && self.base_size == other.base_size - && self.operands_in == other.operands_in - && self.operands_out == other.operands_out - && self.compute_size == other.compute_size - && self.branch_range == other.branch_range - && self.clobbers_flags == other.clobbers_flags - && self.inst_predicate == other.inst_predicate - && self.isa_predicate == other.isa_predicate - && self.emit == other.emit - } -} - -// To allow using it in a hashmap. -impl Eq for EncodingRecipe {} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct EncodingRecipeNumber(u32); -entity_impl!(EncodingRecipeNumber); - -pub(crate) type Recipes = PrimaryMap; - -#[derive(Clone)] -pub(crate) struct EncodingRecipeBuilder { - pub name: String, - format: Rc, - pub base_size: u64, - pub operands_in: Option>, - pub operands_out: Option>, - pub compute_size: Option<&'static str>, - pub branch_range: Option, - pub emit: Option, - clobbers_flags: Option, - inst_predicate: Option, - isa_predicate: Option, -} diff --git a/cranelift/codegen/meta/src/cdsl/type_inference.rs b/cranelift/codegen/meta/src/cdsl/type_inference.rs index e17c305f9c..76fc1284f2 100644 --- a/cranelift/codegen/meta/src/cdsl/type_inference.rs +++ b/cranelift/codegen/meta/src/cdsl/type_inference.rs @@ -1,8 +1,4 @@ -use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool}; -use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar}; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; +use crate::cdsl::typevar::TypeVar; #[derive(Debug, Hash, PartialEq, Eq)] pub(crate) enum Constraint { @@ -11,651 +7,4 @@ pub(crate) enum Constraint { /// 1) They have the same number of lanes /// 2) In a lane tv1 has at least as many bits as tv2. WiderOrEq(TypeVar, TypeVar), - - /// Constraint specifying that two derived type vars must have the same runtime type. - Eq(TypeVar, TypeVar), - - /// Constraint specifying that a type var must belong to some typeset. - InTypeset(TypeVar, TypeSet), -} - -impl Constraint { - fn translate_with TypeVar>(&self, func: F) -> Constraint { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - let lhs = func(&lhs); - let rhs = func(&rhs); - Constraint::WiderOrEq(lhs, rhs) - } - Constraint::Eq(lhs, rhs) => { - let lhs = func(&lhs); - let rhs = func(&rhs); - Constraint::Eq(lhs, rhs) - } - Constraint::InTypeset(tv, ts) => { - let tv = func(&tv); - Constraint::InTypeset(tv, ts.clone()) - } - } - } - - /// Creates a new constraint by replacing type vars by their hashmap equivalent. - fn translate_with_map( - &self, - original_to_own_typevar: &HashMap<&TypeVar, TypeVar>, - ) -> Constraint { - self.translate_with(|tv| substitute(original_to_own_typevar, tv)) - } - - /// Creates a new constraint by replacing type vars by their canonical equivalent. - fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint { - self.translate_with(|tv| type_env.get_equivalent(tv)) - } - - fn is_trivial(&self) -> bool { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - // Trivially true. - if lhs == rhs { - return true; - } - - let ts1 = lhs.get_typeset(); - let ts2 = rhs.get_typeset(); - - // Trivially true. - if ts1.is_wider_or_equal(&ts2) { - return true; - } - - // Trivially false. - if ts1.is_narrower(&ts2) { - return true; - } - - // Trivially false. - if (&ts1.lanes & &ts2.lanes).is_empty() { - return true; - } - - self.is_concrete() - } - Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(), - Constraint::InTypeset(_, _) => { - // The way InTypeset are made, they would always be trivial if we were applying the - // same logic as the Python code did, so ignore this. - self.is_concrete() - } - } - } - - /// Returns true iff all the referenced type vars are singletons. - fn is_concrete(&self) -> bool { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - lhs.singleton_type().is_some() && rhs.singleton_type().is_some() - } - Constraint::Eq(lhs, rhs) => { - lhs.singleton_type().is_some() && rhs.singleton_type().is_some() - } - Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(), - } - } - - fn typevar_args(&self) -> Vec<&TypeVar> { - match self { - Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs], - Constraint::Eq(lhs, rhs) => vec![lhs, rhs], - Constraint::InTypeset(tv, _) => vec![tv], - } - } -} - -#[derive(Clone, Copy)] -enum TypeEnvRank { - Singleton = 5, - Input = 4, - Intermediate = 3, - Output = 2, - Temp = 1, - Internal = 0, -} - -/// Class encapsulating the necessary bookkeeping for type inference. -pub(crate) struct TypeEnvironment { - vars: HashSet, - ranks: HashMap, - equivalency_map: HashMap, - pub constraints: Vec, -} - -impl TypeEnvironment { - fn new() -> Self { - TypeEnvironment { - vars: HashSet::new(), - ranks: HashMap::new(), - equivalency_map: HashMap::new(), - constraints: Vec::new(), - } - } - - fn register(&mut self, var_index: VarIndex, var: &mut Var) { - self.vars.insert(var_index); - let rank = if var.is_input() { - TypeEnvRank::Input - } else if var.is_intermediate() { - TypeEnvRank::Intermediate - } else if var.is_output() { - TypeEnvRank::Output - } else { - assert!(var.is_temp()); - TypeEnvRank::Temp - }; - self.ranks.insert(var.get_or_create_typevar(), rank); - } - - fn add_constraint(&mut self, constraint: Constraint) { - if self.constraints.iter().any(|item| *item == constraint) { - return; - } - - // Check extra conditions for InTypeset constraints. - if let Constraint::InTypeset(tv, _) = &constraint { - assert!( - tv.base.is_none(), - "type variable is {:?}, while expecting none", - tv - ); - assert!( - tv.name.starts_with("typeof_"), - "Name \"{}\" should start with \"typeof_\"", - tv.name - ); - } - - self.constraints.push(constraint); - } - - /// Returns the canonical representative of the equivalency class of the given argument, or - /// duplicates it if it's not there yet. - pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar { - let mut tv = tv; - while let Some(found) = self.equivalency_map.get(tv) { - tv = found; - } - match &tv.base { - Some(parent) => self - .get_equivalent(&parent.type_var) - .derived(parent.derived_func), - None => tv.clone(), - } - } - - /// Get the rank of tv in the partial order: - /// - TVs directly associated with a Var get their rank from the Var (see register()). - /// - Internally generated non-derived TVs implicitly get the lowest rank (0). - /// - Derived variables get their rank from their free typevar. - /// - Singletons have the highest rank. - /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with - /// temporary vars. - fn rank(&self, tv: &TypeVar) -> u8 { - let actual_tv = match tv.base { - Some(_) => tv.free_typevar(), - None => Some(tv.clone()), - }; - - let rank = match actual_tv { - Some(actual_tv) => match self.ranks.get(&actual_tv) { - Some(rank) => Some(*rank), - None => { - assert!( - !actual_tv.name.starts_with("typeof_"), - "variable {} should be explicitly ranked", - actual_tv.name - ); - None - } - }, - None => None, - }; - - let rank = match rank { - Some(rank) => rank, - None => { - if tv.singleton_type().is_some() { - TypeEnvRank::Singleton - } else { - TypeEnvRank::Internal - } - } - }; - - rank as u8 - } - - /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The - /// canonical representative of the merged class is tv2's canonical representative. - fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) { - assert!(tv1.base.is_none()); - assert!(self.get_equivalent(&tv1) == tv1); - if let Some(tv2_base) = &tv2.base { - // Ensure there are no cycles. - assert!(self.get_equivalent(&tv2_base.type_var) != tv1); - } - self.equivalency_map.insert(tv1, tv2); - } - - /// Get the free typevars in the current type environment. - pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec { - let mut typevars = Vec::new(); - typevars.extend(self.equivalency_map.keys().cloned()); - typevars.extend( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let set: HashSet = HashSet::from_iter( - typevars - .iter() - .map(|tv| self.get_equivalent(tv).free_typevar()) - .filter(|opt_tv| { - // Filter out singleton types. - opt_tv.is_some() - }) - .map(|tv| tv.unwrap()), - ); - Vec::from_iter(set) - } - - /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a - /// single type var derived from them or equivalent to them. - /// - /// e.g. if we have a root of the tree that looks like: - /// - /// typeof_a typeof_b - /// \\ / - /// typeof_x - /// | - /// half_width(1) - /// | - /// 1 - /// - /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is: - /// - /// typeof_a typeof_b - /// \\ / - /// typeof_x - fn normalize(&mut self, var_pool: &mut VarPool) { - let source_tvs: HashSet = HashSet::from_iter( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let mut children: HashMap> = HashMap::new(); - - // Insert all the parents found by the derivation relationship. - for type_var in self.equivalency_map.values() { - if type_var.base.is_none() { - continue; - } - - let parent_tv = type_var.free_typevar(); - if parent_tv.is_none() { - // Ignore this type variable, it's a singleton. - continue; - } - let parent_tv = parent_tv.unwrap(); - - children - .entry(parent_tv) - .or_insert_with(HashSet::new) - .insert(type_var.clone()); - } - - // Insert all the explicit equivalency links. - for (equivalent_tv, canon_tv) in self.equivalency_map.iter() { - children - .entry(canon_tv.clone()) - .or_insert_with(HashSet::new) - .insert(equivalent_tv.clone()); - } - - // Remove links that are straight paths up to typevar of variables. - for free_root in self.free_typevars(var_pool) { - let mut root = &free_root; - while !source_tvs.contains(&root) - && children.contains_key(&root) - && children.get(&root).unwrap().len() == 1 - { - let child = children.get(&root).unwrap().iter().next().unwrap(); - assert_eq!(self.equivalency_map[child], root.clone()); - self.equivalency_map.remove(child); - root = child; - } - } - } - - /// Extract a clean type environment from self, that only mentions type vars associated with - /// real variables. - fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment { - let vars_tv: HashSet = HashSet::from_iter( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let mut new_equivalency_map: HashMap = HashMap::new(); - for tv in &vars_tv { - let canon_tv = self.get_equivalent(tv); - if *tv != canon_tv { - new_equivalency_map.insert(tv.clone(), canon_tv.clone()); - } - - // Sanity check: the translated type map should only refer to real variables. - assert!(vars_tv.contains(tv)); - let canon_free_tv = canon_tv.free_typevar(); - assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap())); - } - - let mut new_constraints: HashSet = HashSet::new(); - for constraint in &self.constraints { - let constraint = constraint.translate_with_env(&self); - if constraint.is_trivial() || new_constraints.contains(&constraint) { - continue; - } - - // Sanity check: translated constraints should refer only to real variables. - for arg in constraint.typevar_args() { - let arg_free_tv = arg.free_typevar(); - assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap())); - } - - new_constraints.insert(constraint); - } - - TypeEnvironment { - vars: self.vars, - ranks: self.ranks, - equivalency_map: new_equivalency_map, - constraints: Vec::from_iter(new_constraints), - } - } -} - -/// Replaces an external type variable according to the following rules: -/// - if a local copy is present in the map, return it. -/// - or if it's derived, create a local derived one that recursively substitutes the parent. -/// - or return itself. -fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar { - match map.get(&external_type_var) { - Some(own_type_var) => own_type_var.clone(), - None => match &external_type_var.base { - Some(parent) => { - let parent_substitute = substitute(map, &parent.type_var); - TypeVar::derived(&parent_substitute, parent.derived_func) - } - None => external_type_var.clone(), - }, - } -} - -/// Normalize a (potentially derived) typevar using the following rules: -/// -/// - vector and width derived functions commute -/// {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) -> -/// {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base)) -/// -/// - half/double pairs collapse -/// {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base -/// {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base -fn canonicalize_derivations(tv: TypeVar) -> TypeVar { - let base = match &tv.base { - Some(base) => base, - None => return tv, - }; - - let derived_func = base.derived_func; - - if let Some(base_base) = &base.type_var.base { - let base_base_tv = &base_base.type_var; - match (derived_func, base_base.derived_func) { - (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth) - | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth) - | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => { - // Cancelling bijective transformations. This doesn't hide any overflow issues - // since derived type sets are checked upon derivaion, and base typesets are only - // allowed to shrink. - return canonicalize_derivations(base_base_tv.clone()); - } - (DerivedFunc::HalfWidth, DerivedFunc::HalfVector) - | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => { - // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute. - return canonicalize_derivations( - base_base_tv - .derived(derived_func) - .derived(base_base.derived_func), - ); - } - _ => {} - }; - } - - canonicalize_derivations(base.type_var.clone()).derived(derived_func) -} - -/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets -/// to be the same. When one is derived from the other, repeat the constrain process until -/// a fixed point is reached. -fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) { - loop { - let old_tv1_ts = tv1.get_typeset().clone(); - tv2.constrain_types(tv1.clone()); - if tv1.get_typeset() == old_tv1_ts { - break; - } - } - - let old_tv2_ts = tv2.get_typeset(); - tv1.constrain_types(tv2.clone()); - // The above loop should ensure that all reference cycles have been handled. - assert!(old_tv2_ts == tv2.get_typeset()); -} - -/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's -/// one, modulo commutations. -fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> { - let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1)); - let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2)); - - if tv1 == tv2 { - // Already unified. - return Ok(()); - } - - if type_env.rank(&tv2) < type_env.rank(&tv1) { - // Make sure tv1 always has the smallest rank, since real variables have the higher rank - // and we want them to be the canonical representatives of their equivalency classes. - return unify(&tv2, &tv1, type_env); - } - - constrain_fixpoint(&tv1, &tv2); - - if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 { - return Err(format!( - "Error: empty type created when unifying {} and {}", - tv1.name, tv2.name - )); - } - - let base = match &tv1.base { - Some(base) => base, - None => { - type_env.record_equivalent(tv1, tv2); - return Ok(()); - } - }; - - if let Some(inverse) = base.derived_func.inverse() { - return unify(&base.type_var, &tv2.derived(inverse), type_env); - } - - type_env.add_constraint(Constraint::Eq(tv1, tv2)); - Ok(()) -} - -/// Perform type inference on one Def in the current type environment and return an updated type -/// environment or error. -/// -/// At a high level this works by creating fresh copies of each formal type var in the Def's -/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar. -fn infer_definition( - def: &Def, - var_pool: &mut VarPool, - type_env: TypeEnvironment, - last_type_index: &mut usize, -) -> Result { - let apply = &def.apply; - let inst = &apply.inst; - - let mut type_env = type_env; - let free_formal_tvs = inst.all_typevars(); - - let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new(); - for &tv in &free_formal_tvs { - assert!(original_to_own_typevar - .insert( - tv, - TypeVar::copy_from(tv, format!("own_{}", last_type_index)) - ) - .is_none()); - *last_type_index += 1; - } - - // Update the mapping with any explicity bound type vars: - for (i, value_type) in apply.value_types.iter().enumerate() { - let singleton = TypeVar::new_singleton(value_type.clone()); - assert!(original_to_own_typevar - .insert(free_formal_tvs[i], singleton) - .is_some()); - } - - // Get fresh copies for each typevar in the signature (both free and derived). - let mut formal_tvs = Vec::new(); - formal_tvs.extend(inst.value_results.iter().map(|&i| { - substitute( - &original_to_own_typevar, - inst.operands_out[i].type_var().unwrap(), - ) - })); - formal_tvs.extend(inst.value_opnums.iter().map(|&i| { - substitute( - &original_to_own_typevar, - inst.operands_in[i].type_var().unwrap(), - ) - })); - - // Get the list of actual vars. - let mut actual_vars = Vec::new(); - actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i])); - actual_vars.extend( - inst.value_opnums - .iter() - .map(|&i| apply.args[i].unwrap_var()), - ); - - // Get the list of the actual TypeVars. - let mut actual_tvs = Vec::new(); - for var_index in actual_vars { - let var = var_pool.get_mut(var_index); - type_env.register(var_index, var); - actual_tvs.push(var.get_or_create_typevar()); - } - - // Make sure we start unifying with the control type variable first, by putting it at the - // front of both vectors. - if let Some(poly) = &inst.polymorphic_info { - let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar]; - let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap(); - if ctrl_index != 0 { - formal_tvs.swap(0, ctrl_index); - actual_tvs.swap(0, ctrl_index); - } - } - - // Unify each actual type variable with the corresponding formal type variable. - for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) { - if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) { - return Err(format!( - "fail ti on {} <: {}: {}", - actual_tv.name, formal_tv.name, msg - )); - } - } - - // Add any instruction specific constraints. - for constraint in &inst.constraints { - type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar)); - } - - Ok(type_env) -} - -/// Perform type inference on an transformation. Return an updated type environment or error. -pub(crate) fn infer_transform( - src: DefIndex, - dst: &[DefIndex], - def_pool: &DefPool, - var_pool: &mut VarPool, -) -> Result { - let mut type_env = TypeEnvironment::new(); - let mut last_type_index = 0; - - // Execute type inference on the source pattern. - type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index) - .map_err(|err| format!("In src pattern: {}", err))?; - - // Collect the type sets once after applying the source patterm; we'll compare the typesets - // after we've also considered the destination pattern, and will emit supplementary InTypeset - // checks if they don't match. - let src_typesets = type_env - .vars - .iter() - .map(|&var_index| { - let var = var_pool.get_mut(var_index); - let tv = type_env.get_equivalent(&var.get_or_create_typevar()); - (var_index, tv.get_typeset()) - }) - .collect::>(); - - // Execute type inference on the destination pattern. - for (i, &def_index) in dst.iter().enumerate() { - let def = def_pool.get(def_index); - type_env = infer_definition(def, var_pool, type_env, &mut last_type_index) - .map_err(|err| format!("line {}: {}", i, err))?; - } - - for (var_index, src_typeset) in src_typesets { - let var = var_pool.get(var_index); - if !var.has_free_typevar() { - continue; - } - let tv = type_env.get_equivalent(&var.get_typevar().unwrap()); - let new_typeset = tv.get_typeset(); - assert!( - new_typeset.is_subset(&src_typeset), - "type sets can only get narrower" - ); - if new_typeset != src_typeset { - type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone())); - } - } - - type_env.normalize(var_pool); - - Ok(type_env.extract(var_pool)) } diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index c1027bf847..af1ba966a4 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -1,5 +1,5 @@ use std::cell::RefCell; -use std::collections::{BTreeSet, HashSet}; +use std::collections::BTreeSet; use std::fmt; use std::hash; use std::iter::FromIterator; @@ -269,52 +269,6 @@ impl TypeVar { pub fn merge_lanes(&self) -> TypeVar { self.derived(DerivedFunc::MergeLanes) } - - /// Constrain the range of types this variable can assume to a subset of those in the typeset - /// ts. - /// May mutate itself if it's not derived, or its parent if it is. - pub fn constrain_types_by_ts(&self, type_set: TypeSet) { - match &self.base { - Some(base) => { - base.type_var - .constrain_types_by_ts(type_set.preimage(base.derived_func)); - } - None => { - self.content - .borrow_mut() - .type_set - .inplace_intersect_with(&type_set); - } - } - } - - /// Constrain the range of types this variable can assume to a subset of those `other` can - /// assume. - /// May mutate itself if it's not derived, or its parent if it is. - pub fn constrain_types(&self, other: TypeVar) { - if self == &other { - return; - } - self.constrain_types_by_ts(other.get_typeset()); - } - - /// Get a Rust expression that computes the type of this type variable. - pub fn to_rust_code(&self) -> String { - match &self.base { - Some(base) => format!( - "{}.{}().unwrap()", - base.type_var.to_rust_code(), - base.derived_func.name() - ), - None => { - if let Some(singleton) = self.singleton_type() { - singleton.rust_name() - } else { - self.name.clone() - } - } - } - } } impl Into for &TypeVar { @@ -392,19 +346,6 @@ impl DerivedFunc { DerivedFunc::MergeLanes => "merge_lanes", } } - - /// Returns the inverse function of this one, if it is a bijection. - pub fn inverse(self) -> Option { - match self { - DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth), - DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth), - DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector), - DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector), - DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes), - DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes), - _ => None, - } - } } #[derive(Debug, Hash)] @@ -594,94 +535,6 @@ impl TypeSet { assert_eq!(types.len(), 1); types.remove(0) } - - /// Return the inverse image of self across the derived function func. - fn preimage(&self, func: DerivedFunc) -> TypeSet { - if self.size() == 0 { - // The inverse of the empty set is itself. - return self.clone(); - } - - match func { - DerivedFunc::LaneOf => { - let mut copy = self.clone(); - copy.lanes = - NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i))); - copy - } - DerivedFunc::AsBool => { - let mut copy = self.clone(); - if self.bools.contains(&1) { - copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]); - copy.floats = NumSet::from_iter(vec![32, 64]); - } else { - copy.ints = &self.bools - &NumSet::from_iter(vec![1]); - copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]); - // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as - // as_bool() of scalars is always b1. - copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]); - } - copy - } - DerivedFunc::HalfWidth => self.double_width(), - DerivedFunc::DoubleWidth => self.half_width(), - DerivedFunc::HalfVector => self.double_vector(), - DerivedFunc::DoubleVector => self.half_vector(), - DerivedFunc::SplitLanes => self.double_width().half_vector(), - DerivedFunc::MergeLanes => self.half_width().double_vector(), - } - } - - pub fn inplace_intersect_with(&mut self, other: &TypeSet) { - self.lanes = &self.lanes & &other.lanes; - self.ints = &self.ints & &other.ints; - self.floats = &self.floats & &other.floats; - self.bools = &self.bools & &other.bools; - self.refs = &self.refs & &other.refs; - - let mut new_specials = Vec::new(); - for spec in &self.specials { - if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) { - new_specials.push(*spec); - } - } - self.specials = new_specials; - } - - pub fn is_subset(&self, other: &TypeSet) -> bool { - self.lanes.is_subset(&other.lanes) - && self.ints.is_subset(&other.ints) - && self.floats.is_subset(&other.floats) - && self.bools.is_subset(&other.bools) - && self.refs.is_subset(&other.refs) - && { - let specials: HashSet = HashSet::from_iter(self.specials.clone()); - let other_specials = HashSet::from_iter(other.specials.clone()); - specials.is_subset(&other_specials) - } - } - - pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool { - set_wider_or_equal(&self.ints, &other.ints) - && set_wider_or_equal(&self.floats, &other.floats) - && set_wider_or_equal(&self.bools, &other.bools) - && set_wider_or_equal(&self.refs, &other.refs) - } - - pub fn is_narrower(&self, other: &TypeSet) -> bool { - set_narrower(&self.ints, &other.ints) - && set_narrower(&self.floats, &other.floats) - && set_narrower(&self.bools, &other.bools) - && set_narrower(&self.refs, &other.refs) - } -} - -fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool { - !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max() -} - -fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool { - !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max() } impl fmt::Debug for TypeSet { @@ -806,18 +659,6 @@ impl TypeSetBuilder { self.specials, ) } - - pub fn all() -> TypeSet { - TypeSetBuilder::new() - .ints(Interval::All) - .floats(Interval::All) - .bools(Interval::All) - .refs(Interval::All) - .simd_lanes(Interval::All) - .specials(ValueType::all_special_types().collect()) - .includes_scalars(true) - .build() - } } #[derive(PartialEq)] @@ -1054,135 +895,6 @@ fn test_forward_images() { ); } -#[test] -fn test_backward_images() { - let empty_set = TypeSetBuilder::new().build(); - - // LaneOf. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..1) - .ints(8..8) - .floats(32..32) - .build() - .preimage(DerivedFunc::LaneOf), - TypeSetBuilder::new() - .simd_lanes(Interval::All) - .ints(8..8) - .floats(32..32) - .build() - ); - assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set); - - // AsBool. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..4) - .bools(1..128) - .build() - .preimage(DerivedFunc::AsBool), - TypeSetBuilder::new() - .simd_lanes(1..4) - .ints(Interval::All) - .bools(Interval::All) - .floats(Interval::All) - .build() - ); - - // Double vector. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..1) - .ints(8..8) - .build() - .preimage(DerivedFunc::DoubleVector) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..16) - .floats(32..32) - .build() - .preimage(DerivedFunc::DoubleVector), - TypeSetBuilder::new() - .simd_lanes(1..8) - .ints(8..16) - .floats(32..32) - .build(), - ); - - // Half vector. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(256..256) - .ints(8..8) - .build() - .preimage(DerivedFunc::HalfVector) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(64..128) - .bools(1..32) - .build() - .preimage(DerivedFunc::HalfVector), - TypeSetBuilder::new() - .simd_lanes(128..256) - .bools(1..32) - .build(), - ); - - // Half width. - assert_eq!( - TypeSetBuilder::new() - .ints(128..128) - .floats(64..64) - .bools(128..128) - .build() - .preimage(DerivedFunc::HalfWidth) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(64..256) - .bools(1..64) - .build() - .preimage(DerivedFunc::HalfWidth), - TypeSetBuilder::new() - .simd_lanes(64..256) - .bools(16..128) - .build(), - ); - - // Double width. - assert_eq!( - TypeSetBuilder::new() - .ints(8..8) - .floats(32..32) - .bools(1..8) - .build() - .preimage(DerivedFunc::DoubleWidth) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..16) - .floats(32..64) - .build() - .preimage(DerivedFunc::DoubleWidth), - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..8) - .floats(32..32) - .build() - ); -} #[test] #[should_panic] diff --git a/cranelift/codegen/meta/src/cdsl/xform.rs b/cranelift/codegen/meta/src/cdsl/xform.rs deleted file mode 100644 index da0cc5ba71..0000000000 --- a/cranelift/codegen/meta/src/cdsl/xform.rs +++ /dev/null @@ -1,478 +0,0 @@ -use crate::cdsl::ast::{ - Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition, - VarIndex, VarPool, -}; -use crate::cdsl::instructions::Instruction; -use crate::cdsl::type_inference::{infer_transform, TypeEnvironment}; -use crate::cdsl::typevar::TypeVar; - -use cranelift_entity::{entity_impl, PrimaryMap}; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; - -/// An instruction transformation consists of a source and destination pattern. -/// -/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A -/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of -/// cases when it applies. -/// -/// The source pattern can contain only a single instruction. -pub(crate) struct Transform { - pub src: DefIndex, - pub dst: Vec, - pub var_pool: VarPool, - pub def_pool: DefPool, - pub block_pool: BlockPool, - pub const_pool: ConstPool, - pub type_env: TypeEnvironment, -} - -type SymbolTable = HashMap; - -impl Transform { - fn new(src: DummyDef, dst: Vec) -> Self { - let mut var_pool = VarPool::new(); - let mut def_pool = DefPool::new(); - let mut block_pool = BlockPool::new(); - let mut const_pool = ConstPool::new(); - - let mut input_vars: Vec = Vec::new(); - let mut defined_vars: Vec = Vec::new(); - - // Maps variable names to our own Var copies. - let mut symbol_table: SymbolTable = SymbolTable::new(); - - // Rewrite variables in src and dst using our own copies. - let src = rewrite_def_list( - PatternPosition::Source, - vec![src], - &mut symbol_table, - &mut input_vars, - &mut defined_vars, - &mut var_pool, - &mut def_pool, - &mut block_pool, - &mut const_pool, - )[0]; - - let num_src_inputs = input_vars.len(); - - let dst = rewrite_def_list( - PatternPosition::Destination, - dst, - &mut symbol_table, - &mut input_vars, - &mut defined_vars, - &mut var_pool, - &mut def_pool, - &mut block_pool, - &mut const_pool, - ); - - // Sanity checks. - for &var_index in &input_vars { - assert!( - var_pool.get(var_index).is_input(), - "'{:?}' used as both input and def", - var_pool.get(var_index) - ); - } - assert!( - input_vars.len() == num_src_inputs, - "extra input vars in dst pattern: {:?}", - input_vars - .iter() - .map(|&i| var_pool.get(i)) - .skip(num_src_inputs) - .collect::>() - ); - - // Perform type inference and cleanup. - let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap(); - - // Sanity check: the set of inferred free type variables should be a subset of the type - // variables corresponding to Vars appearing in the source pattern. - { - let free_typevars: HashSet = - HashSet::from_iter(type_env.free_typevars(&mut var_pool)); - let src_tvs = HashSet::from_iter( - input_vars - .clone() - .iter() - .chain( - defined_vars - .iter() - .filter(|&&var_index| !var_pool.get(var_index).is_temp()), - ) - .map(|&var_index| var_pool.get(var_index).get_typevar()) - .filter(|maybe_var| maybe_var.is_some()) - .map(|var| var.unwrap()), - ); - if !free_typevars.is_subset(&src_tvs) { - let missing_tvs = (&free_typevars - &src_tvs) - .iter() - .map(|tv| tv.name.clone()) - .collect::>() - .join(", "); - panic!("Some free vars don't appear in src: {}", missing_tvs); - } - } - - for &var_index in input_vars.iter().chain(defined_vars.iter()) { - let var = var_pool.get_mut(var_index); - let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar()); - var.set_typevar(canon_tv); - } - - Self { - src, - dst, - var_pool, - def_pool, - block_pool, - const_pool, - type_env, - } - } - - fn verify_legalize(&self) { - let def = self.def_pool.get(self.src); - for &var_index in def.defined_vars.iter() { - let defined_var = self.var_pool.get(var_index); - assert!( - defined_var.is_output(), - "{:?} not defined in the destination pattern", - defined_var - ); - } - } -} - -/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable -/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of -/// `defined_vars`. -fn var_index( - name: &str, - symbol_table: &mut SymbolTable, - defined_vars: &mut Vec, - var_pool: &mut VarPool, -) -> VarIndex { - let name = name.to_string(); - match symbol_table.get(&name) { - Some(&existing_var) => existing_var, - None => { - // Materialize the variable. - let new_var = var_pool.create(name.clone()); - symbol_table.insert(name, new_var); - defined_vars.push(new_var); - new_var - } - } -} - -/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals. -fn rewrite_defined_vars( - position: PatternPosition, - dummy_def: &DummyDef, - def_index: DefIndex, - symbol_table: &mut SymbolTable, - defined_vars: &mut Vec, - var_pool: &mut VarPool, -) -> Vec { - let mut new_defined_vars = Vec::new(); - for var in &dummy_def.defined_vars { - let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool); - var_pool.get_mut(own_var).set_def(position, def_index); - new_defined_vars.push(own_var); - } - new_defined_vars -} - -/// Find all uses of variables in `expr` and replace them with our own local symbols. -fn rewrite_expr( - position: PatternPosition, - dummy_expr: DummyExpr, - symbol_table: &mut SymbolTable, - input_vars: &mut Vec, - var_pool: &mut VarPool, - const_pool: &mut ConstPool, -) -> Apply { - let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr - { - (apply_target, dummy_args) - } else { - panic!("we only rewrite apply expressions"); - }; - - assert_eq!( - apply_target.inst().operands_in.len(), - dummy_args.len(), - "number of arguments in instruction {} is incorrect\nexpected: {:?}", - apply_target.inst().name, - apply_target - .inst() - .operands_in - .iter() - .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type)) - .collect::>(), - ); - - let mut args = Vec::new(); - for (i, arg) in dummy_args.into_iter().enumerate() { - match arg { - DummyExpr::Var(var) => { - let own_var = var_index(&var.name, symbol_table, input_vars, var_pool); - let var = var_pool.get(own_var); - assert!( - var.is_input() || var.get_def(position).is_some(), - "{:?} used as both input and def", - var - ); - args.push(Expr::Var(own_var)); - } - DummyExpr::Literal(literal) => { - assert!(!apply_target.inst().operands_in[i].is_value()); - args.push(Expr::Literal(literal)); - } - DummyExpr::Constant(constant) => { - let const_name = const_pool.insert(constant.0); - // Here we abuse var_index by passing an empty, immediately-dropped vector to - // `defined_vars`; the reason for this is that unlike the `Var` case above, - // constants will create a variable that is not an input variable (it is tracked - // instead by ConstPool). - let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool); - args.push(Expr::Var(const_var)); - } - DummyExpr::Apply(..) => { - panic!("Recursive apply is not allowed."); - } - DummyExpr::Block(_block) => { - panic!("Blocks are not valid arguments."); - } - } - } - - Apply::new(apply_target, args) -} - -#[allow(clippy::too_many_arguments)] -fn rewrite_def_list( - position: PatternPosition, - dummy_defs: Vec, - symbol_table: &mut SymbolTable, - input_vars: &mut Vec, - defined_vars: &mut Vec, - var_pool: &mut VarPool, - def_pool: &mut DefPool, - block_pool: &mut BlockPool, - const_pool: &mut ConstPool, -) -> Vec { - let mut new_defs = Vec::new(); - // Register variable names of new blocks first as a block name can be used to jump forward. Thus - // the name has to be registered first to avoid misinterpreting it as an input-var. - for dummy_def in dummy_defs.iter() { - if let DummyExpr::Block(ref var) = dummy_def.expr { - var_index(&var.name, symbol_table, defined_vars, var_pool); - } - } - - // Iterate over the definitions and blocks, to map variables names to inputs or outputs. - for dummy_def in dummy_defs { - let def_index = def_pool.next_index(); - - let new_defined_vars = rewrite_defined_vars( - position, - &dummy_def, - def_index, - symbol_table, - defined_vars, - var_pool, - ); - if let DummyExpr::Block(var) = dummy_def.expr { - let var_index = *symbol_table - .get(&var.name) - .or_else(|| { - panic!( - "Block {} was not registered during the first visit", - var.name - ) - }) - .unwrap(); - var_pool.get_mut(var_index).set_def(position, def_index); - block_pool.create_block(var_index, def_index); - } else { - let new_apply = rewrite_expr( - position, - dummy_def.expr, - symbol_table, - input_vars, - var_pool, - const_pool, - ); - - assert!( - def_pool.next_index() == def_index, - "shouldn't have created new defs in the meanwhile" - ); - assert_eq!( - new_apply.inst.value_results.len(), - new_defined_vars.len(), - "number of Var results in instruction is incorrect" - ); - - new_defs.push(def_pool.create_inst(new_apply, new_defined_vars)); - } - } - new_defs -} - -/// A group of related transformations. -pub(crate) struct TransformGroup { - pub name: &'static str, - pub doc: &'static str, - pub chain_with: Option, - pub isa_name: Option<&'static str>, - pub id: TransformGroupIndex, - - /// Maps Instruction camel_case names to custom legalization functions names. - pub custom_legalizes: HashMap, - pub transforms: Vec, -} - -impl TransformGroup { - pub fn rust_name(&self) -> String { - match self.isa_name { - Some(_) => { - // This is a function in the same module as the LEGALIZE_ACTIONS table referring to - // it. - self.name.to_string() - } - None => format!("crate::legalizer::{}", self.name), - } - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct TransformGroupIndex(u32); -entity_impl!(TransformGroupIndex); - -pub(crate) struct TransformGroupBuilder { - name: &'static str, - doc: &'static str, - chain_with: Option, - isa_name: Option<&'static str>, - pub custom_legalizes: HashMap, - pub transforms: Vec, -} - -impl TransformGroupBuilder { - pub fn new(name: &'static str, doc: &'static str) -> Self { - Self { - name, - doc, - chain_with: None, - isa_name: None, - custom_legalizes: HashMap::new(), - transforms: Vec::new(), - } - } - - pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self { - assert!(self.chain_with.is_none()); - self.chain_with = Some(next_id); - self - } - - /// Add a custom legalization action for `inst`. - /// - /// The `func_name` parameter is the fully qualified name of a Rust function which takes the - /// same arguments as the `isa::Legalize` actions. - /// - /// The custom function will be called to legalize `inst` and any return value is ignored. - pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) { - assert!( - self.custom_legalizes - .insert(inst.camel_name.clone(), func_name) - .is_none(), - "custom legalization action for {} inserted twice", - inst.name - ); - } - - /// Add a legalization pattern to this group. - pub fn legalize(&mut self, src: DummyDef, dst: Vec) { - let transform = Transform::new(src, dst); - transform.verify_legalize(); - self.transforms.push(transform); - } - - pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex { - let next_id = owner.next_key(); - owner.add(TransformGroup { - name: self.name, - doc: self.doc, - isa_name: self.isa_name, - id: next_id, - chain_with: self.chain_with, - custom_legalizes: self.custom_legalizes, - transforms: self.transforms, - }) - } -} - -pub(crate) struct TransformGroups { - groups: PrimaryMap, -} - -impl TransformGroups { - pub fn new() -> Self { - Self { - groups: PrimaryMap::new(), - } - } - pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex { - for group in self.groups.values() { - assert!( - group.name != new_group.name, - "trying to insert {} for the second time", - new_group.name - ); - } - self.groups.push(new_group) - } - pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup { - &self.groups[id] - } - fn next_key(&self) -> TransformGroupIndex { - self.groups.next_key() - } - pub fn by_name(&self, name: &'static str) -> &TransformGroup { - for group in self.groups.values() { - if group.name == name { - return group; - } - } - panic!("transform group with name {} not found", name); - } -} - -#[test] -#[should_panic] -fn test_double_custom_legalization() { - use crate::cdsl::formats::InstructionFormatBuilder; - use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder}; - - let nullary = InstructionFormatBuilder::new("nullary").build(); - - let mut dummy_all = AllInstructions::new(); - let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all); - inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary)); - - let inst_group = inst_group.build(); - let dummy_inst = inst_group.by_name("dummy"); - - let mut transform_group = TransformGroupBuilder::new("test", "doc"); - transform_group.custom_legalize(&dummy_inst, "custom 1"); - transform_group.custom_legalize(&dummy_inst, "custom 2"); -} diff --git a/cranelift/codegen/meta/src/gen_legalizer.rs b/cranelift/codegen/meta/src/gen_legalizer.rs deleted file mode 100644 index 7b56b8db48..0000000000 --- a/cranelift/codegen/meta/src/gen_legalizer.rs +++ /dev/null @@ -1,734 +0,0 @@ -//! Generate transformations to legalize instructions without encodings. -use crate::cdsl::ast::{Def, DefPool, Expr, VarPool}; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::operands::Operand; -use crate::cdsl::type_inference::Constraint; -use crate::cdsl::typevar::{TypeSet, TypeVar}; -use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups}; - -use crate::error; -use crate::gen_inst::gen_typesets_table; -use crate::srcgen::Formatter; -use crate::unique_table::UniqueTable; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; - -/// Given a `Def` node, emit code that extracts all the instruction fields from -/// `pos.func.dfg[iref]`. -/// -/// Create local variables named after the `Var` instances in `node`. -/// -/// Also create a local variable named `predicate` with the value of the evaluated instruction -/// predicate, or `true` if the node has no predicate. -fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool { - let var_pool = &transform.var_pool; - let def_pool = &transform.def_pool; - - let def = def_pool.get(transform.src); - let apply = &def.apply; - let inst = &apply.inst; - let iform = &inst.format; - - fmt.comment(format!( - "Unwrap fields from instruction format {}", - def.to_comment_string(&transform.var_pool) - )); - - // Extract the Var arguments. - let arg_names = apply - .args - .iter() - .enumerate() - .filter(|(arg_num, _)| { - // Variable args are specially handled after extracting args. - !inst.operands_in[*arg_num].is_varargs() - }) - .map(|(arg_num, arg)| match &arg { - Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(), - Expr::Literal(_) => { - let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap(); - iform.imm_fields[n].member - } - }) - .collect::>() - .join(", "); - - // May we need "args" in the values consumed by predicates? - let emit_args = iform.num_value_operands >= 1 || iform.has_value_list; - - // We need a tuple: - // - if there's at least one value operand, then we emit a variable for the value, and the - // value list as args. - // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one. - let need_tuple = if iform.num_value_operands >= 1 { - true - } else { - let mut imm_and_varargs = inst - .operands_in - .iter() - .filter(|op| op.is_immediate_or_entityref()) - .count(); - if iform.has_value_list { - imm_and_varargs += 1; - } - imm_and_varargs > 1 - }; - - let maybe_args = if emit_args { ", args" } else { "" }; - let defined_values = format!("{}{}", arg_names, maybe_args); - - let tuple_or_value = if need_tuple { - format!("({})", defined_values) - } else { - defined_values - }; - - fmtln!( - fmt, - "let {} = if let ir::InstructionData::{} {{", - tuple_or_value, - iform.name - ); - - fmt.indent(|fmt| { - // Fields are encoded directly. - for field in &iform.imm_fields { - fmtln!(fmt, "{},", field.member); - } - - if iform.has_value_list || iform.num_value_operands > 1 { - fmt.line("ref args,"); - } else if iform.num_value_operands == 1 { - fmt.line("arg,"); - } - - fmt.line(".."); - fmt.outdented_line("} = pos.func.dfg[inst] {"); - - if iform.has_value_list { - fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);"); - } else if iform.num_value_operands == 1 { - fmt.line("let args = [arg];") - } - - // Generate the values for the tuple. - let emit_one_value = - |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| { - let comma = if needs_comma { "," } else { "" }; - if op.is_immediate_or_entityref() { - let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap(); - fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma); - } else if op.is_value() { - let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap(); - fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n); - } else { - // This is a value list argument or a varargs. - assert!(iform.has_value_list || op.is_varargs()); - } - }; - - if need_tuple { - fmt.line("("); - fmt.indent(|fmt| { - for (op_num, op) in inst.operands_in.iter().enumerate() { - let needs_comma = emit_args || op_num + 1 < inst.operands_in.len(); - emit_one_value(fmt, needs_comma, op_num, op); - } - if emit_args { - fmt.line("args"); - } - }); - fmt.line(")"); - } else { - // Only one of these can be true at the same time, otherwise we'd need a tuple. - emit_one_value(fmt, false, 0, &inst.operands_in[0]); - if emit_args { - fmt.line("args"); - } - } - - fmt.outdented_line("} else {"); - fmt.line(r#"unreachable!("bad instruction format")"#); - }); - fmtln!(fmt, "};"); - fmt.empty_line(); - - assert_eq!(inst.operands_in.len(), apply.args.len()); - for (i, op) in inst.operands_in.iter().enumerate() { - if op.is_varargs() { - let name = &var_pool - .get(apply.args[i].maybe_var().expect("vararg without name")) - .name; - let n = inst - .imm_opnums - .iter() - .chain(inst.value_opnums.iter()) - .max() - .copied() - .unwrap_or(0); - fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n); - } - } - - for &op_num in &inst.value_opnums { - let arg = &apply.args[op_num]; - if let Some(var_index) = arg.maybe_var() { - let var = var_pool.get(var_index); - if var.has_free_typevar() { - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type({});", - var.name, - var.name - ); - } - } - } - - // If the definition creates results, detach the values and place them in locals. - let mut replace_inst = false; - if !def.defined_vars.is_empty() { - if def.defined_vars - == def_pool - .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) - .defined_vars - { - // Special case: The instruction replacing node defines the exact same values. - fmt.comment(format!( - "Results handled by {}.", - def_pool - .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) - .to_comment_string(var_pool) - )); - - fmt.line("let r = pos.func.dfg.inst_results(inst);"); - for (i, &var_index) in def.defined_vars.iter().enumerate() { - let var = var_pool.get(var_index); - fmtln!(fmt, "let {} = &r[{}];", var.name, i); - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type(*{});", - var.name, - var.name - ); - } - - replace_inst = true; - } else { - // Boring case: Detach the result values, capture them in locals. - for &var_index in &def.defined_vars { - fmtln!(fmt, "let {};", var_pool.get(var_index).name); - } - - fmt.line("{"); - fmt.indent(|fmt| { - fmt.line("let r = pos.func.dfg.inst_results(inst);"); - for i in 0..def.defined_vars.len() { - let var = var_pool.get(def.defined_vars[i]); - fmtln!(fmt, "{} = r[{}];", var.name, i); - } - }); - fmt.line("}"); - - for &var_index in &def.defined_vars { - let var = var_pool.get(var_index); - if var.has_free_typevar() { - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type({});", - var.name, - var.name - ); - } - } - } - } - replace_inst -} - -fn build_derived_expr(tv: &TypeVar) -> String { - let base = match &tv.base { - Some(base) => base, - None => { - assert!(tv.name.starts_with("typeof_")); - return format!("Some({})", tv.name); - } - }; - let base_expr = build_derived_expr(&base.type_var); - format!( - "{}.map(|t: crate::ir::Type| t.{}())", - base_expr, - base.derived_func.name() - ) -} - -/// Emit rust code for the given check. -/// -/// The emitted code is a statement redefining the `predicate` variable like this: -/// let predicate = predicate && ... -fn emit_runtime_typecheck<'a>( - constraint: &'a Constraint, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - match constraint { - Constraint::InTypeset(tv, ts) => { - let ts_index = type_sets.add(&ts); - fmt.comment(format!( - "{} must belong to {:?}", - tv.name, - type_sets.get(ts_index) - )); - fmtln!( - fmt, - "let predicate = predicate && TYPE_SETS[{}].contains({});", - ts_index, - tv.name - ); - } - Constraint::Eq(tv1, tv2) => { - fmtln!( - fmt, - "let predicate = predicate && match ({}, {}) {{", - build_derived_expr(tv1), - build_derived_expr(tv2) - ); - fmt.indent(|fmt| { - fmt.line("(Some(a), Some(b)) => a == b,"); - fmt.comment("On overflow, constraint doesn\'t apply"); - fmt.line("_ => false,"); - }); - fmtln!(fmt, "};"); - } - Constraint::WiderOrEq(tv1, tv2) => { - fmtln!( - fmt, - "let predicate = predicate && match ({}, {}) {{", - build_derived_expr(tv1), - build_derived_expr(tv2) - ); - fmt.indent(|fmt| { - fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),"); - fmt.comment("On overflow, constraint doesn\'t apply"); - fmt.line("_ => false,"); - }); - fmtln!(fmt, "};"); - } - } -} - -/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit. -/// These instructions are lowered specially by the `legalize::split` module. -fn is_value_split(def: &Def) -> bool { - let name = &def.apply.inst.name; - name == "isplit" || name == "vsplit" -} - -fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) { - let defined_vars = { - let vars = def - .defined_vars - .iter() - .map(|&var_index| var_pool.get(var_index).name.as_ref()) - .collect::>(); - if vars.len() == 1 { - vars[0].to_string() - } else { - format!("({})", vars.join(", ")) - } - }; - - if is_value_split(def) { - // Split instructions are not emitted with the builder, but by calling special functions in - // the `legalizer::split` module. These functions will eliminate concat-split patterns. - fmt.line("let curpos = pos.position();"); - fmt.line("let srcloc = pos.srcloc();"); - fmtln!( - fmt, - "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});", - defined_vars, - def.apply.inst.snake_name(), - def.apply.args[0].to_rust_code(var_pool) - ); - return; - } - - if def.defined_vars.is_empty() { - // This node doesn't define any values, so just insert the new instruction. - fmtln!( - fmt, - "pos.ins().{};", - def.apply.rust_builder(&def.defined_vars, var_pool) - ); - return; - } - - if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def { - if def.defined_vars == def_pool.get(src_def0).defined_vars { - // The replacement instruction defines the exact same values as the source pattern. - // Unwrapping would have left the results intact. Replace the whole instruction. - fmtln!( - fmt, - "let {} = pos.func.dfg.replace(inst).{};", - defined_vars, - def.apply.rust_builder(&def.defined_vars, var_pool) - ); - - // We need to bump the cursor so following instructions are inserted *after* the - // replaced instruction. - fmt.line("if pos.current_inst() == Some(inst) {"); - fmt.indent(|fmt| { - fmt.line("pos.next_inst();"); - }); - fmt.line("}"); - return; - } - } - - // Insert a new instruction. - let mut builder = format!("let {} = pos.ins()", defined_vars); - - if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() { - // Reuse the single source result value. - builder = format!( - "{}.with_result({})", - builder, - var_pool.get(def.defined_vars[0]).to_rust_code() - ); - } else if def - .defined_vars - .iter() - .any(|&var_index| var_pool.get(var_index).is_output()) - { - // There are more than one output values that can be reused. - let array = def - .defined_vars - .iter() - .map(|&var_index| { - let var = var_pool.get(var_index); - if var.is_output() { - format!("Some({})", var.name) - } else { - "None".into() - } - }) - .collect::>() - .join(", "); - builder = format!("{}.with_results([{}])", builder, array); - } - - fmtln!( - fmt, - "{}.{};", - builder, - def.apply.rust_builder(&def.defined_vars, var_pool) - ); -} - -/// Emit code for `transform`, assuming that the opcode of transform's root instruction -/// has already been matched. -/// -/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`. -/// `dfg: DataFlowGraph` is available and mutable. -fn gen_transform<'a>( - replace_inst: bool, - transform: &'a Transform, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - // Evaluate the instruction predicate if any. - let apply = &transform.def_pool.get(transform.src).apply; - - let inst_predicate = apply - .inst_predicate_with_ctrl_typevar(&transform.var_pool) - .rust_predicate("pos.func"); - - let has_extra_constraints = !transform.type_env.constraints.is_empty(); - if has_extra_constraints { - // Extra constraints rely on the predicate being a variable that we can rebind as we add - // more constraint predicates. - if let Some(pred) = &inst_predicate { - fmt.multi_line(&format!("let predicate = {};", pred)); - } else { - fmt.line("let predicate = true;"); - } - } - - // Emit any runtime checks; these will rebind `predicate` emitted right above. - for constraint in &transform.type_env.constraints { - emit_runtime_typecheck(constraint, type_sets, fmt); - } - - let do_expand = |fmt: &mut Formatter| { - // Emit any constants that must be created before use. - for (name, value) in transform.const_pool.iter() { - fmtln!( - fmt, - "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());", - name, - value - ); - } - - // If we are adding some blocks, we need to recall the original block, such that we can - // recompute it. - if !transform.block_pool.is_empty() { - fmt.line("let orig_block = pos.current_block().unwrap();"); - } - - // If we're going to delete `inst`, we need to detach its results first so they can be - // reattached during pattern expansion. - if !replace_inst { - fmt.line("pos.func.dfg.clear_results(inst);"); - } - - // Emit new block creation. - for block in &transform.block_pool { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name); - } - - // Emit the destination pattern. - for &def_index in &transform.dst { - if let Some(block) = transform.block_pool.get(def_index) { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "pos.insert_block({});", var.name); - } - emit_dst_inst( - transform.def_pool.get(def_index), - &transform.def_pool, - &transform.var_pool, - fmt, - ); - } - - // Insert a new block after the last instruction, if needed. - let def_next_index = transform.def_pool.next_index(); - if let Some(block) = transform.block_pool.get(def_next_index) { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "pos.insert_block({});", var.name); - } - - // Delete the original instruction if we didn't have an opportunity to replace it. - if !replace_inst { - fmt.line("let removed = pos.remove_inst();"); - fmt.line("debug_assert_eq!(removed, inst);"); - } - - if transform.block_pool.is_empty() { - if transform.def_pool.get(transform.src).apply.inst.is_branch { - // A branch might have been legalized into multiple branches, so we need to recompute - // the cfg. - fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());"); - } - } else { - // Update CFG for the new blocks. - fmt.line("cfg.recompute_block(pos.func, orig_block);"); - for block in &transform.block_pool { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name); - } - } - - fmt.line("return true;"); - }; - - // Guard the actual expansion by `predicate`. - if has_extra_constraints { - fmt.line("if predicate {"); - fmt.indent(|fmt| { - do_expand(fmt); - }); - fmt.line("}"); - } else if let Some(pred) = &inst_predicate { - fmt.multi_line(&format!("if {} {{", pred)); - fmt.indent(|fmt| { - do_expand(fmt); - }); - fmt.line("}"); - } else { - // Unconditional transform (there was no predicate), just emit it. - do_expand(fmt); - } -} - -fn gen_transform_group<'a>( - group: &'a TransformGroup, - transform_groups: &TransformGroups, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - fmt.doc_comment(group.doc); - fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]"); - - // Function arguments. - fmtln!(fmt, "pub fn {}(", group.name); - fmt.indent(|fmt| { - fmt.line("inst: crate::ir::Inst,"); - fmt.line("func: &mut crate::ir::Function,"); - fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,"); - fmt.line("isa: &dyn crate::isa::TargetIsa,"); - }); - fmtln!(fmt, ") -> bool {"); - - // Function body. - fmt.indent(|fmt| { - fmt.line("use crate::ir::InstBuilder;"); - fmt.line("use crate::cursor::{Cursor, FuncCursor};"); - fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);"); - fmt.line("pos.use_srcloc(inst);"); - - // Group the transforms by opcode so we can generate a big switch. - // Preserve ordering. - let mut inst_to_transforms = HashMap::new(); - for transform in &group.transforms { - let def_index = transform.src; - let inst = &transform.def_pool.get(def_index).apply.inst; - inst_to_transforms - .entry(inst.camel_name.clone()) - .or_insert_with(Vec::new) - .push(transform); - } - - let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys()); - sorted_inst_names.sort(); - - fmt.line("{"); - fmt.indent(|fmt| { - fmt.line("match pos.func.dfg[inst].opcode() {"); - fmt.indent(|fmt| { - for camel_name in sorted_inst_names { - fmtln!(fmt, "ir::Opcode::{} => {{", camel_name); - fmt.indent(|fmt| { - let transforms = inst_to_transforms.get(camel_name).unwrap(); - - // Unwrap the source instruction, create local variables for the input variables. - let replace_inst = unwrap_inst(&transforms[0], fmt); - fmt.empty_line(); - - for (i, transform) in transforms.iter().enumerate() { - if i > 0 { - fmt.empty_line(); - } - gen_transform(replace_inst, transform, type_sets, fmt); - } - }); - fmtln!(fmt, "}"); - fmt.empty_line(); - } - - // Emit the custom transforms. The Rust compiler will complain about any overlap with - // the normal transforms. - let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes); - sorted_custom_legalizes.sort(); - for (inst_camel_name, func_name) in sorted_custom_legalizes { - fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name); - fmt.indent(|fmt| { - fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name); - fmt.line("return true;"); - }); - fmtln!(fmt, "}"); - fmt.empty_line(); - } - - // We'll assume there are uncovered opcodes. - fmt.line("_ => {},"); - }); - fmt.line("}"); - }); - fmt.line("}"); - - // If we fall through, nothing was expanded; call the chain if any. - match &group.chain_with { - Some(group_id) => fmtln!( - fmt, - "{}(inst, func, cfg, isa)", - transform_groups.get(*group_id).rust_name() - ), - None => fmt.line("false"), - }; - }); - fmtln!(fmt, "}"); - fmt.empty_line(); -} - -/// Generate legalization functions for `isa` and add any shared `TransformGroup`s -/// encountered to `shared_groups`. -/// -/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables. -fn gen_isa( - isa: &TargetIsa, - transform_groups: &TransformGroups, - shared_group_names: &mut HashSet<&'static str>, - fmt: &mut Formatter, -) { - let mut type_sets = UniqueTable::new(); - for group_index in isa.transitive_transform_groups(transform_groups) { - let group = transform_groups.get(group_index); - match group.isa_name { - Some(isa_name) => { - assert!( - isa_name == isa.name, - "ISA-specific legalizations must be used by the same ISA" - ); - gen_transform_group(group, transform_groups, &mut type_sets, fmt); - } - None => { - shared_group_names.insert(group.name); - } - } - } - - gen_typesets_table(&type_sets, fmt); - - let direct_groups = isa.direct_transform_groups(); - fmtln!( - fmt, - "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [", - direct_groups.len() - ); - fmt.indent(|fmt| { - for &group_index in direct_groups { - fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name()); - } - }); - fmtln!(fmt, "];"); -} - -/// Generate the legalizer files. -pub(crate) fn generate( - isas: &[TargetIsa], - transform_groups: &TransformGroups, - extra_legalization_groups: &[&'static str], - filename_prefix: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut shared_group_names = HashSet::new(); - - for isa in isas { - let mut fmt = Formatter::new(); - gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt); - fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?; - } - - // Add extra legalization groups that were explicitly requested. - for group in extra_legalization_groups { - shared_group_names.insert(group); - } - - // Generate shared legalize groups. - let mut fmt = Formatter::new(); - // Generate shared legalize groups. - let mut type_sets = UniqueTable::new(); - let mut sorted_shared_group_names = Vec::from_iter(shared_group_names); - sorted_shared_group_names.sort(); - for group_name in &sorted_shared_group_names { - let group = transform_groups.by_name(group_name); - gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt); - } - gen_typesets_table(&type_sets, &mut fmt); - fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?; - - Ok(()) -} diff --git a/cranelift/codegen/meta/src/isa/arm32/mod.rs b/cranelift/codegen/meta/src/isa/arm32/mod.rs index 2dc58e4053..f524a87ea7 100644 --- a/cranelift/codegen/meta/src/isa/arm32/mod.rs +++ b/cranelift/codegen/meta/src/isa/arm32/mod.rs @@ -1,6 +1,4 @@ -use crate::cdsl::instructions::InstructionPredicateMap; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; @@ -52,20 +50,5 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); let regs = define_regs(); - let cpu_modes = vec![]; - - // TODO implement arm32 recipes. - let recipes = Recipes::new(); - - // TODO implement arm32 encodings and predicates. - let encodings_predicates = InstructionPredicateMap::new(); - - TargetIsa::new( - "arm32", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) + TargetIsa::new("arm32", settings, regs) } diff --git a/cranelift/codegen/meta/src/isa/arm64/mod.rs b/cranelift/codegen/meta/src/isa/arm64/mod.rs index 3ae57fbb62..a8920f703d 100644 --- a/cranelift/codegen/meta/src/isa/arm64/mod.rs +++ b/cranelift/codegen/meta/src/isa/arm64/mod.rs @@ -1,6 +1,4 @@ -use crate::cdsl::instructions::InstructionPredicateMap; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; @@ -51,20 +49,5 @@ pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); let regs = define_registers(); - let cpu_modes = vec![]; - - // TODO implement arm64 recipes. - let recipes = Recipes::new(); - - // TODO implement arm64 encodings and predicates. - let encodings_predicates = InstructionPredicateMap::new(); - - TargetIsa::new( - "arm64", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) + TargetIsa::new("arm64", settings, regs) } diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x/mod.rs index 97a5947080..a4fb05a9f5 100644 --- a/cranelift/codegen/meta/src/isa/s390x/mod.rs +++ b/cranelift/codegen/meta/src/isa/s390x/mod.rs @@ -1,6 +1,4 @@ -use crate::cdsl::instructions::InstructionPredicateMap; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::IsaRegsBuilder; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; @@ -46,17 +44,6 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); let regs = IsaRegsBuilder::new().build(); - let recipes = Recipes::new(); - let encodings_predicates = InstructionPredicateMap::new(); - let cpu_modes = vec![]; - - TargetIsa::new( - "s390x", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) + TargetIsa::new("s390x", settings, regs) } diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 7c3e4c6877..e21bfe1485 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -1,6 +1,4 @@ -use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; use crate::cdsl::regs::IsaRegsBuilder; use crate::shared::Definitions as SharedDefinitions; @@ -10,16 +8,5 @@ pub(crate) mod settings; pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = settings::define(&shared_defs.settings); - let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); - - let cpu_modes = vec![]; - - TargetIsa::new( - "x86", - settings, - IsaRegsBuilder::new().build(), - Recipes::new(), - cpu_modes, - InstructionPredicateMap::new(), - ) + TargetIsa::new("x86", settings, IsaRegsBuilder::new().build()) } diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index bfa1cd0056..d8972702a3 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -8,7 +8,6 @@ pub mod error; pub mod isa; mod gen_inst; -mod gen_legalizer; mod gen_registers; mod gen_settings; mod gen_types; @@ -55,21 +54,6 @@ pub fn generate( &out_dir, )?; - let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() { - // The new backend only requires the "expand" legalization group. - &["expand"] - } else { - &[] - }; - - gen_legalizer::generate( - &target_isas, - &shared_defs.transform_groups, - extra_legalization_groups, - "legalize", - &out_dir, - )?; - for isa in target_isas { gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 0565645d4d..d869f8629c 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1,7 +1,7 @@ #![allow(non_snake_case)] use crate::cdsl::instructions::{ - AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, + AllInstructions, InstructionBuilder as Inst, InstructionGroupBuilder, }; use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint::WiderOrEq; @@ -767,7 +767,7 @@ pub(crate) fn define( formats: &Formats, imm: &Immediates, entities: &EntityRefs, -) -> InstructionGroup { +) { let mut ig = InstructionGroupBuilder::new(all_instructions); define_control_flow(&mut ig, formats, imm, entities); @@ -4647,6 +4647,4 @@ pub(crate) fn define( ) .other_side_effects(true), ); - - ig.build() } diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs deleted file mode 100644 index 9a0d6cffde..0000000000 --- a/cranelift/codegen/meta/src/shared/legalize.rs +++ /dev/null @@ -1,1087 +0,0 @@ -use crate::cdsl::ast::{var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup}; -use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; - -use crate::shared::immediates::Immediates; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I128, I16, I32, I64, I8}; -use cranelift_codegen_shared::condcodes::{CondCode, IntCC}; - -#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)] -pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups { - let mut narrow = TransformGroupBuilder::new( - "narrow", - r#" - Legalize instructions by narrowing. - - The transformations in the 'narrow' group work by expressing - instructions in terms of smaller types. Operations on vector types are - expressed in terms of vector types with fewer lanes, and integer - operations are expressed in terms of smaller integer types. - "#, - ); - - let mut widen = TransformGroupBuilder::new( - "widen", - r#" - Legalize instructions by widening. - - The transformations in the 'widen' group work by expressing - instructions in terms of larger types. - "#, - ); - - let mut expand = TransformGroupBuilder::new( - "expand", - r#" - Legalize instructions by expansion. - - Rewrite instructions in terms of other instructions, generally - operating on the same types as the original instructions. - "#, - ); - - // List of instructions. - let band = insts.by_name("band"); - let band_imm = insts.by_name("band_imm"); - let band_not = insts.by_name("band_not"); - let bint = insts.by_name("bint"); - let bitrev = insts.by_name("bitrev"); - let bnot = insts.by_name("bnot"); - let bor = insts.by_name("bor"); - let bor_imm = insts.by_name("bor_imm"); - let bor_not = insts.by_name("bor_not"); - let brnz = insts.by_name("brnz"); - let brz = insts.by_name("brz"); - let br_icmp = insts.by_name("br_icmp"); - let br_table = insts.by_name("br_table"); - let bxor = insts.by_name("bxor"); - let bxor_imm = insts.by_name("bxor_imm"); - let bxor_not = insts.by_name("bxor_not"); - let cls = insts.by_name("cls"); - let clz = insts.by_name("clz"); - let ctz = insts.by_name("ctz"); - let copy = insts.by_name("copy"); - let fabs = insts.by_name("fabs"); - let f32const = insts.by_name("f32const"); - let f64const = insts.by_name("f64const"); - let fcopysign = insts.by_name("fcopysign"); - let fcvt_from_sint = insts.by_name("fcvt_from_sint"); - let fneg = insts.by_name("fneg"); - let iadd = insts.by_name("iadd"); - let iadd_cin = insts.by_name("iadd_cin"); - let iadd_cout = insts.by_name("iadd_cout"); - let iadd_carry = insts.by_name("iadd_carry"); - let iadd_ifcin = insts.by_name("iadd_ifcin"); - let iadd_ifcout = insts.by_name("iadd_ifcout"); - let iadd_imm = insts.by_name("iadd_imm"); - let icmp = insts.by_name("icmp"); - let icmp_imm = insts.by_name("icmp_imm"); - let iconcat = insts.by_name("iconcat"); - let iconst = insts.by_name("iconst"); - let ifcmp = insts.by_name("ifcmp"); - let ifcmp_imm = insts.by_name("ifcmp_imm"); - let imul = insts.by_name("imul"); - let imul_imm = insts.by_name("imul_imm"); - let ireduce = insts.by_name("ireduce"); - let irsub_imm = insts.by_name("irsub_imm"); - let ishl = insts.by_name("ishl"); - let ishl_imm = insts.by_name("ishl_imm"); - let isplit = insts.by_name("isplit"); - let istore8 = insts.by_name("istore8"); - let istore16 = insts.by_name("istore16"); - let isub = insts.by_name("isub"); - let isub_bin = insts.by_name("isub_bin"); - let isub_bout = insts.by_name("isub_bout"); - let isub_borrow = insts.by_name("isub_borrow"); - let isub_ifbin = insts.by_name("isub_ifbin"); - let isub_ifbout = insts.by_name("isub_ifbout"); - let jump = insts.by_name("jump"); - let load = insts.by_name("load"); - let popcnt = insts.by_name("popcnt"); - let resumable_trapnz = insts.by_name("resumable_trapnz"); - let rotl = insts.by_name("rotl"); - let rotl_imm = insts.by_name("rotl_imm"); - let rotr = insts.by_name("rotr"); - let rotr_imm = insts.by_name("rotr_imm"); - let sdiv = insts.by_name("sdiv"); - let sdiv_imm = insts.by_name("sdiv_imm"); - let select = insts.by_name("select"); - let sextend = insts.by_name("sextend"); - let sshr = insts.by_name("sshr"); - let sshr_imm = insts.by_name("sshr_imm"); - let srem = insts.by_name("srem"); - let srem_imm = insts.by_name("srem_imm"); - let store = insts.by_name("store"); - let udiv = insts.by_name("udiv"); - let udiv_imm = insts.by_name("udiv_imm"); - let uextend = insts.by_name("uextend"); - let uload8 = insts.by_name("uload8"); - let uload16 = insts.by_name("uload16"); - let umulhi = insts.by_name("umulhi"); - let ushr = insts.by_name("ushr"); - let ushr_imm = insts.by_name("ushr_imm"); - let urem = insts.by_name("urem"); - let urem_imm = insts.by_name("urem_imm"); - let trapif = insts.by_name("trapif"); - let trapnz = insts.by_name("trapnz"); - let trapz = insts.by_name("trapz"); - - // Custom expansions for memory objects. - expand.custom_legalize(insts.by_name("global_value"), "expand_global_value"); - expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr"); - expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr"); - - // Custom expansions for calls. - expand.custom_legalize(insts.by_name("call"), "expand_call"); - - // Custom expansions that need to change the CFG. - // TODO: Add sufficient XForm syntax that we don't need to hand-code these. - expand.custom_legalize(trapz, "expand_cond_trap"); - expand.custom_legalize(trapnz, "expand_cond_trap"); - expand.custom_legalize(resumable_trapnz, "expand_cond_trap"); - expand.custom_legalize(br_table, "expand_br_table"); - expand.custom_legalize(select, "expand_select"); - widen.custom_legalize(select, "expand_select"); // small ints - - // Custom expansions for floating point constants. - // These expansions require bit-casting or creating constant pool entries. - expand.custom_legalize(f32const, "expand_fconst"); - expand.custom_legalize(f64const, "expand_fconst"); - - // Custom expansions for stack memory accesses. - expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); - expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); - - // Custom expansions for small stack memory acccess. - widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); - widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); - - // List of variables to reuse in patterns. - let x = var("x"); - let y = var("y"); - let z = var("z"); - let a = var("a"); - let a1 = var("a1"); - let a2 = var("a2"); - let a3 = var("a3"); - let a4 = var("a4"); - let b = var("b"); - let b1 = var("b1"); - let b2 = var("b2"); - let b3 = var("b3"); - let b4 = var("b4"); - let b_in = var("b_in"); - let b_int = var("b_int"); - let c = var("c"); - let c1 = var("c1"); - let c2 = var("c2"); - let c3 = var("c3"); - let c4 = var("c4"); - let c_in = var("c_in"); - let c_int = var("c_int"); - let d = var("d"); - let d1 = var("d1"); - let d2 = var("d2"); - let d3 = var("d3"); - let d4 = var("d4"); - let e = var("e"); - let e1 = var("e1"); - let e2 = var("e2"); - let e3 = var("e3"); - let e4 = var("e4"); - let f = var("f"); - let f1 = var("f1"); - let f2 = var("f2"); - let xl = var("xl"); - let xh = var("xh"); - let yl = var("yl"); - let yh = var("yh"); - let al = var("al"); - let ah = var("ah"); - let cc = var("cc"); - let block = var("block"); - let ptr = var("ptr"); - let flags = var("flags"); - let offset = var("off"); - let vararg = var("vararg"); - - narrow.custom_legalize(load, "narrow_load"); - narrow.custom_legalize(store, "narrow_store"); - - // iconst.i64 can't be legalized in the meta langage (because integer literals can't be - // embedded as part of arguments), so use a custom legalization for now. - narrow.custom_legalize(iconst, "narrow_iconst"); - - for &(ty, ty_half) in &[(I128, I64), (I64, I32)] { - let inst = uextend.bind(ty).bind(ty_half); - narrow.legalize( - def!(a = inst(x)), - vec![ - def!(ah = iconst(Literal::constant(&imm.imm64, 0))), - def!(a = iconcat(x, ah)), - ], - ); - } - - for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] { - let inst = sextend.bind(ty).bind(ty_half); - narrow.legalize( - def!(a = inst(x)), - vec![ - def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number - def!(a = iconcat(x, ah)), - ], - ); - } - - for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] { - narrow.legalize( - def!(a = bin_op(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(al = bin_op(xl, yl)), - def!(ah = bin_op(xh, yh)), - def!(a = iconcat(al, ah)), - ], - ); - } - - narrow.legalize( - def!(a = bnot(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(al = bnot(xl)), - def!(ah = bnot(xh)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow.legalize( - def!(a = select(c, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(al = select(c, xl, yl)), - def!(ah = select(c, xh, yh)), - def!(a = iconcat(al, ah)), - ], - ); - - for &ty in &[I128, I64] { - let block = var("block"); - let block1 = var("block1"); - let block2 = var("block2"); - - narrow.legalize( - def!(brz.ty(x, block, vararg)), - vec![ - def!((xl, xh) = isplit(x)), - def!( - a = icmp_imm( - Literal::enumerator_for(&imm.intcc, "eq"), - xl, - Literal::constant(&imm.imm64, 0) - ) - ), - def!( - b = icmp_imm( - Literal::enumerator_for(&imm.intcc, "eq"), - xh, - Literal::constant(&imm.imm64, 0) - ) - ), - def!(c = band(a, b)), - def!(brnz(c, block, vararg)), - ], - ); - - narrow.legalize( - def!(brnz.ty(x, block1, vararg)), - vec![ - def!((xl, xh) = isplit(x)), - def!(brnz(xl, block1, vararg)), - def!(jump(block2, Literal::empty_vararg())), - block!(block2), - def!(brnz(xh, block1, vararg)), - ], - ); - } - - narrow.legalize( - def!(a = popcnt.I128(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(e1 = popcnt(xl)), - def!(e2 = popcnt(xh)), - def!(e3 = iadd(e1, e2)), - def!(a = uextend(e3)), - ], - ); - - // TODO(ryzokuken): benchmark this and decide if branching is a faster - // approach than evaluating boolean expressions. - - narrow.custom_legalize(icmp_imm, "narrow_icmp_imm"); - - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); - for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] { - narrow.legalize( - def!(b = icmp.int_ty(intcc_eq, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)), - def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)), - def!(b = band(b1, b2)), - ], - ); - - narrow.legalize( - def!(b = icmp.int_ty(intcc_ne, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)), - def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)), - def!(b = bor(b1, b2)), - ], - ); - - use IntCC::*; - for cc in &[ - SignedGreaterThan, - SignedGreaterThanOrEqual, - SignedLessThan, - SignedLessThanOrEqual, - UnsignedGreaterThan, - UnsignedGreaterThanOrEqual, - UnsignedLessThan, - UnsignedLessThanOrEqual, - ] { - let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str()); - let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str()); - let cc2 = - Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str()); - let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str()); - narrow.legalize( - def!(b = icmp.int_ty(intcc_cc, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - // X = cc1 || (!cc2 && cc3) - def!(b1 = icmp.int_ty_half(cc1, xh, yh)), - def!(b2 = icmp.int_ty_half(cc2, xh, yh)), - def!(b3 = icmp.int_ty_half(cc3, xl, yl)), - def!(c1 = bnot(b2)), - def!(c2 = band(c1, b3)), - def!(b = bor(b1, c2)), - ], - ); - } - } - - // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a - // separate legalization for x86. - for &ty in &[I64, I128] { - narrow.legalize( - def!(a = imul.ty(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(a1 = imul(xh, yl)), - def!(a2 = imul(xl, yh)), - def!(a3 = iadd(a1, a2)), - def!(a4 = umulhi(xl, yl)), - def!(ah = iadd(a3, a4)), - def!(al = imul(xl, yl)), - def!(a = iconcat(al, ah)), - ], - ); - } - - let zero = Literal::constant(&imm.imm64, 0); - narrow.legalize( - def!(a = iadd_imm.I128(x, c)), - vec![ - def!(yh = iconst.I64(zero)), - def!(yl = iconst.I64(c)), - def!(y = iconcat.I64(yh, yl)), - def!(a = iadd(x, y)), - ], - ); - - // Widen instructions with one input operand. - for &op in &[bnot, popcnt] { - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = op.int_ty(b)), - vec![ - def!(x = uextend.I32(b)), - def!(z = op.I32(x)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - } - - // Widen instructions with two input operands. - let mut widen_two_arg = |signed: bool, op: &Instruction| { - for &int_ty in &[I8, I16] { - let sign_ext_op = if signed { sextend } else { uextend }; - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sign_ext_op.I32(b)), - def!(y = sign_ext_op.I32(c)), - def!(z = op.I32(x, y)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - }; - - for bin_op in &[ - iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not, - ] { - widen_two_arg(false, bin_op); - } - for bin_op in &[sdiv, srem] { - widen_two_arg(true, bin_op); - } - - // Widen instructions using immediate operands. - let mut widen_imm = |signed: bool, op: &Instruction| { - for &int_ty in &[I8, I16] { - let sign_ext_op = if signed { sextend } else { uextend }; - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sign_ext_op.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - }; - - for bin_op in &[ - iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm, - ] { - widen_imm(false, bin_op); - } - for bin_op in &[sdiv_imm, srem_imm] { - widen_imm(true, bin_op); - } - - for &(int_ty, num) in &[(I8, 24), (I16, 16)] { - let imm = Literal::constant(&imm.imm64, -num); - - widen.legalize( - def!(a = clz.int_ty(b)), - vec![ - def!(c = uextend.I32(b)), - def!(d = clz.I32(c)), - def!(e = iadd_imm(d, imm)), - def!(a = ireduce.int_ty(e)), - ], - ); - - widen.legalize( - def!(a = cls.int_ty(b)), - vec![ - def!(c = sextend.I32(b)), - def!(d = cls.I32(c)), - def!(e = iadd_imm(d, imm)), - def!(a = ireduce.int_ty(e)), - ], - ); - } - - for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] { - let num = Literal::constant(&imm.imm64, num); - widen.legalize( - def!(a = ctz.int_ty(b)), - vec![ - def!(c = uextend.I32(b)), - // When `b` is zero, returns the size of x in bits. - def!(d = bor_imm(c, num)), - def!(e = ctz.I32(d)), - def!(a = ireduce.int_ty(e)), - ], - ); - } - - // iconst - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = iconst.int_ty(b)), - vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))], - ); - } - - for &extend_op in &[uextend, sextend] { - // The sign extension operators have two typevars: the result has one and controls the - // instruction, then the input has one. - let bound = extend_op.bind(I16).bind(I8); - widen.legalize( - def!(a = bound(b)), - vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))], - ); - } - - widen.legalize( - def!(store.I8(flags, a, ptr, offset)), - vec![ - def!(b = uextend.I32(a)), - def!(istore8(flags, b, ptr, offset)), - ], - ); - - widen.legalize( - def!(store.I16(flags, a, ptr, offset)), - vec![ - def!(b = uextend.I32(a)), - def!(istore16(flags, b, ptr, offset)), - ], - ); - - widen.legalize( - def!(a = load.I8(flags, ptr, offset)), - vec![ - def!(b = uload8.I32(flags, ptr, offset)), - def!(a = ireduce(b)), - ], - ); - - widen.legalize( - def!(a = load.I16(flags, ptr, offset)), - vec![ - def!(b = uload16.I32(flags, ptr, offset)), - def!(a = ireduce(b)), - ], - ); - - for &int_ty in &[I8, I16] { - widen.legalize( - def!(br_table.int_ty(x, y, z)), - vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))], - ); - } - - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = bint.int_ty(b)), - vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))], - ); - } - - for &int_ty in &[I8, I16] { - for &op in &[ishl, ishl_imm, ushr, ushr_imm] { - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = uextend.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - - for &op in &[sshr, sshr_imm] { - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sextend.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - - for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] { - let w_cc = Literal::enumerator_for(&imm.intcc, cc); - widen.legalize( - def!(a = icmp_imm.int_ty(w_cc, b, c)), - vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], - ); - widen.legalize( - def!(a = icmp.int_ty(w_cc, b, c)), - vec![ - def!(x = uextend.I32(b)), - def!(y = uextend.I32(c)), - def!(a = icmp.I32(w_cc, x, y)), - ], - ); - } - - for cc in &["sgt", "slt", "sge", "sle"] { - let w_cc = Literal::enumerator_for(&imm.intcc, cc); - widen.legalize( - def!(a = icmp_imm.int_ty(w_cc, b, c)), - vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], - ); - - widen.legalize( - def!(a = icmp.int_ty(w_cc, b, c)), - vec![ - def!(x = sextend.I32(b)), - def!(y = sextend.I32(c)), - def!(a = icmp(w_cc, x, y)), - ], - ); - } - } - - for &ty in &[I8, I16] { - widen.legalize( - def!(brz.ty(x, block, vararg)), - vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))], - ); - - widen.legalize( - def!(brnz.ty(x, block, vararg)), - vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))], - ); - } - - for &(ty_half, ty) in &[(I64, I128), (I32, I64)] { - let inst = ireduce.bind(ty_half).bind(ty); - expand.legalize( - def!(a = inst(x)), - vec![def!((b, c) = isplit(x)), def!(a = copy(b))], - ); - } - - // Expand integer operations with carry for RISC architectures that don't have - // the flags. - let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult"); - expand.legalize( - def!((a, c) = iadd_cout(x, y)), - vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))], - ); - - let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt"); - expand.legalize( - def!((a, b) = isub_bout(x, y)), - vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))], - ); - - expand.legalize( - def!(a = iadd_cin(x, y, c)), - vec![ - def!(a1 = iadd(x, y)), - def!(c_int = bint(c)), - def!(a = iadd(a1, c_int)), - ], - ); - - expand.legalize( - def!(a = isub_bin(x, y, b)), - vec![ - def!(a1 = isub(x, y)), - def!(b_int = bint(b)), - def!(a = isub(a1, b_int)), - ], - ); - - expand.legalize( - def!((a, c) = iadd_carry(x, y, c_in)), - vec![ - def!((a1, c1) = iadd_cout(x, y)), - def!(c_int = bint(c_in)), - def!((a, c2) = iadd_cout(a1, c_int)), - def!(c = bor(c1, c2)), - ], - ); - - expand.legalize( - def!((a, b) = isub_borrow(x, y, b_in)), - vec![ - def!((a1, b1) = isub_bout(x, y)), - def!(b_int = bint(b_in)), - def!((a, b2) = isub_bout(a1, b_int)), - def!(b = bor(b1, b2)), - ], - ); - - // Expansion for fcvt_from_sint for smaller integer types. - // This uses expand and not widen because the controlling type variable for - // this instruction is f32/f64, which is legalized as part of the expand - // group. - for &dest_ty in &[F32, F64] { - for &src_ty in &[I8, I16] { - let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty); - expand.legalize( - def!(a = bound_inst(b)), - vec![ - def!(x = sextend.I32(b)), - def!(a = fcvt_from_sint.dest_ty(x)), - ], - ); - } - } - - // Expansions for immediate operands that are out of range. - for &(inst_imm, inst) in &[ - (iadd_imm, iadd), - (imul_imm, imul), - (sdiv_imm, sdiv), - (udiv_imm, udiv), - (srem_imm, srem), - (urem_imm, urem), - (band_imm, band), - (bor_imm, bor), - (bxor_imm, bxor), - (ifcmp_imm, ifcmp), - ] { - expand.legalize( - def!(a = inst_imm(x, y)), - vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))], - ); - } - - expand.legalize( - def!(a = irsub_imm(y, x)), - vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))], - ); - - // Rotates and shifts. - for &(inst_imm, inst) in &[ - (rotl_imm, rotl), - (rotr_imm, rotr), - (ishl_imm, ishl), - (sshr_imm, sshr), - (ushr_imm, ushr), - ] { - expand.legalize( - def!(a = inst_imm(x, y)), - vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))], - ); - } - - expand.legalize( - def!(a = icmp_imm(cc, x, y)), - vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))], - ); - - //# Expansions for *_not variants of bitwise ops. - for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] { - expand.legalize( - def!(a = inst_not(x, y)), - vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))], - ); - } - - //# Expand bnot using xor. - let minus_one = Literal::constant(&imm.imm64, -1); - expand.legalize( - def!(a = bnot(x)), - vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))], - ); - - //# Expand bitrev - //# Adapted from Stack Overflow. - //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c - let imm64_1 = Literal::constant(&imm.imm64, 1); - let imm64_2 = Literal::constant(&imm.imm64, 2); - let imm64_4 = Literal::constant(&imm.imm64, 4); - - widen.legalize( - def!(a = bitrev.I8(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(a = bor(c2, c4)), - ], - ); - - let imm64_8 = Literal::constant(&imm.imm64, 8); - - widen.legalize( - def!(a = bitrev.I16(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(a = bor(d2, d4)), - ], - ); - - let imm64_16 = Literal::constant(&imm.imm64, 16); - - expand.legalize( - def!(a = bitrev.I32(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(e = bor(d2, d4)), - def!(e1 = ushr_imm(e, imm64_16)), - def!(e2 = ishl_imm(e, imm64_16)), - def!(a = bor(e1, e2)), - ], - ); - - #[allow(overflowing_literals)] - let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa); - let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555); - #[allow(overflowing_literals)] - let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc); - let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333); - #[allow(overflowing_literals)] - let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0); - let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f); - #[allow(overflowing_literals)] - let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00); - let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff); - #[allow(overflowing_literals)] - let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000); - let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff); - let imm64_32 = Literal::constant(&imm.imm64, 32); - - expand.legalize( - def!(a = bitrev.I64(x)), - vec![ - def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, imm64_0x5555555555555555)), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, imm64_0x3333333333333333)), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(e = bor(d2, d4)), - def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)), - def!(e2 = ushr_imm(e1, imm64_16)), - def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)), - def!(e4 = ishl_imm(e3, imm64_16)), - def!(f = bor(e2, e4)), - def!(f1 = ushr_imm(f, imm64_32)), - def!(f2 = ishl_imm(f, imm64_32)), - def!(a = bor(f1, f2)), - ], - ); - - narrow.legalize( - def!(a = bitrev.I128(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(yh = bitrev(xl)), - def!(yl = bitrev(xh)), - def!(a = iconcat(yl, yh)), - ], - ); - - // Floating-point sign manipulations. - for &(ty, const_inst, minus_zero) in &[ - (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)), - ( - F64, - f64const, - &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000), - ), - ] { - expand.legalize( - def!(a = fabs.ty(x)), - vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))], - ); - - expand.legalize( - def!(a = fneg.ty(x)), - vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))], - ); - - expand.legalize( - def!(a = fcopysign.ty(x, y)), - vec![ - def!(b = const_inst(minus_zero)), - def!(a1 = band_not(x, b)), - def!(a2 = band(y, b)), - def!(a = bor(a1, a2)), - ], - ); - } - - expand.custom_legalize(br_icmp, "expand_br_icmp"); - - let mut groups = TransformGroups::new(); - - let narrow_id = narrow.build_and_add_to(&mut groups); - let expand_id = expand.build_and_add_to(&mut groups); - - // Expansions using CPU flags. - let mut expand_flags = TransformGroupBuilder::new( - "expand_flags", - r#" - Instruction expansions for architectures with flags. - - Expand some instructions using CPU flags, then fall back to the normal - expansions. Not all architectures support CPU flags, so these patterns - are kept separate. - "#, - ) - .chain_with(expand_id); - - let imm64_0 = Literal::constant(&imm.imm64, 0); - let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - - expand_flags.legalize( - def!(trapnz(x, c)), - vec![ - def!(a = ifcmp_imm(x, imm64_0)), - def!(trapif(intcc_ne, a, c)), - ], - ); - - expand_flags.legalize( - def!(trapz(x, c)), - vec![ - def!(a = ifcmp_imm(x, imm64_0)), - def!(trapif(intcc_eq, a, c)), - ], - ); - - expand_flags.build_and_add_to(&mut groups); - - // Narrow legalizations using CPU flags. - let mut narrow_flags = TransformGroupBuilder::new( - "narrow_flags", - r#" - Narrow instructions for architectures with flags. - - Narrow some instructions using CPU flags, then fall back to the normal - legalizations. Not all architectures support CPU flags, so these - patterns are kept separate. - "#, - ) - .chain_with(narrow_id); - - narrow_flags.legalize( - def!(a = iadd(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, c) = iadd_ifcout(xl, yl)), - def!(ah = iadd_ifcin(xh, yh, c)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_flags.legalize( - def!(a = isub(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, b) = isub_ifbout(xl, yl)), - def!(ah = isub_ifbin(xh, yh, b)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_flags.build_and_add_to(&mut groups); - - // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and - // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required. - // Narrow legalizations for ISAs that don't have CPU flags. - let mut narrow_no_flags = TransformGroupBuilder::new( - "narrow_no_flags", - r#" - Narrow instructions for architectures without flags. - - Narrow some instructions avoiding the use of CPU flags, then fall back - to the normal legalizations. Not all architectures support CPU flags, - so these patterns are kept separate. - "#, - ) - .chain_with(narrow_id); - - narrow_no_flags.legalize( - def!(a = iadd(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, c) = iadd_cout(xl, yl)), - def!(ah = iadd_cin(xh, yh, c)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_no_flags.legalize( - def!(a = isub(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, b) = isub_bout(xl, yl)), - def!(ah = isub_bin(xh, yh, b)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_no_flags.build_and_add_to(&mut groups); - - // TODO The order of declarations unfortunately matters to be compatible with the Python code. - // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls - // above. - widen.build_and_add_to(&mut groups); - - groups -} diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs index b185262ccd..53ad796c8c 100644 --- a/cranelift/codegen/meta/src/shared/mod.rs +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -4,14 +4,12 @@ pub mod entities; pub mod formats; pub mod immediates; pub mod instructions; -pub mod legalize; pub mod settings; pub mod types; use crate::cdsl::formats::{FormatStructure, InstructionFormat}; -use crate::cdsl::instructions::{AllInstructions, InstructionGroup}; +use crate::cdsl::instructions::{AllInstructions}; use crate::cdsl::settings::SettingGroup; -use crate::cdsl::xform::TransformGroups; use crate::shared::entities::EntityRefs; use crate::shared::formats::Formats; @@ -24,10 +22,8 @@ use std::rc::Rc; pub(crate) struct Definitions { pub settings: SettingGroup, pub all_instructions: AllInstructions, - pub instructions: InstructionGroup, pub imm: Immediates, pub formats: Formats, - pub transform_groups: TransformGroups, pub entities: EntityRefs, } @@ -37,17 +33,13 @@ pub(crate) fn define() -> Definitions { let immediates = Immediates::new(); let entities = EntityRefs::new(); let formats = Formats::new(&immediates, &entities); - let instructions = - instructions::define(&mut all_instructions, &formats, &immediates, &entities); - let transform_groups = legalize::define(&instructions, &immediates); + instructions::define(&mut all_instructions, &formats, &immediates, &entities); Definitions { settings: settings::define(), all_instructions, - instructions, imm: immediates, formats, - transform_groups, entities, } } diff --git a/cranelift/codegen/meta/src/srcgen.rs b/cranelift/codegen/meta/src/srcgen.rs index 0e8d4eccf0..21e3d5e904 100644 --- a/cranelift/codegen/meta/src/srcgen.rs +++ b/cranelift/codegen/meta/src/srcgen.rs @@ -77,15 +77,6 @@ impl Formatter { } } - /// Get a string containing whitespace outdented one level. Used for - /// lines of code that are inside a single indented block. - fn get_outdent(&mut self) -> String { - self.indent_pop(); - let s = self.get_indent(); - self.indent_push(); - s - } - /// Add an indented line. pub fn line(&mut self, contents: impl AsRef) { let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref()); @@ -97,12 +88,6 @@ impl Formatter { self.lines.push("\n".to_string()); } - /// Emit a line outdented one level. - pub fn outdented_line(&mut self, s: &str) { - let new_line = format!("{}{}\n", self.get_outdent(), s); - self.lines.push(new_line); - } - /// Write `self.lines` to a file. pub fn update_file( &self, diff --git a/cranelift/codegen/meta/src/unique_table.rs b/cranelift/codegen/meta/src/unique_table.rs index 65ef7e8b4a..50c664e4df 100644 --- a/cranelift/codegen/meta/src/unique_table.rs +++ b/cranelift/codegen/meta/src/unique_table.rs @@ -32,9 +32,6 @@ impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> { pub fn len(&self) -> usize { self.table.len() } - pub fn get(&self, index: usize) -> &T { - self.table[index] - } pub fn iter(&self) -> slice::Iter<&'entries T> { self.table.iter() } From 3e4167ba956f15f70b3075e2536c0954d36cc383 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 12:57:37 +0200 Subject: [PATCH 06/14] Remove registers from cranelift-codegen-meta --- cranelift/codegen/meta/src/cdsl/isa.rs | 10 +- cranelift/codegen/meta/src/cdsl/mod.rs | 1 - cranelift/codegen/meta/src/cdsl/regs.rs | 332 -------------------- cranelift/codegen/meta/src/gen_registers.rs | 148 --------- cranelift/codegen/meta/src/isa/arm32/mod.rs | 41 +-- cranelift/codegen/meta/src/isa/arm64/mod.rs | 37 +-- cranelift/codegen/meta/src/isa/s390x/mod.rs | 4 +- cranelift/codegen/meta/src/isa/x86/mod.rs | 3 +- cranelift/codegen/meta/src/lib.rs | 3 - 9 files changed, 6 insertions(+), 573 deletions(-) delete mode 100644 cranelift/codegen/meta/src/cdsl/regs.rs delete mode 100644 cranelift/codegen/meta/src/gen_registers.rs diff --git a/cranelift/codegen/meta/src/cdsl/isa.rs b/cranelift/codegen/meta/src/cdsl/isa.rs index 7eb7c30517..b595ffa99f 100644 --- a/cranelift/codegen/meta/src/cdsl/isa.rs +++ b/cranelift/codegen/meta/src/cdsl/isa.rs @@ -1,18 +1,12 @@ -use crate::cdsl::regs::IsaRegs; use crate::cdsl::settings::SettingGroup; pub(crate) struct TargetIsa { pub name: &'static str, pub settings: SettingGroup, - pub regs: IsaRegs, } impl TargetIsa { - pub fn new(name: &'static str, settings: SettingGroup, regs: IsaRegs) -> Self { - Self { - name, - settings, - regs, - } + pub fn new(name: &'static str, settings: SettingGroup) -> Self { + Self { name, settings } } } diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs index a1fb8d4ea3..bf7acbbeb4 100644 --- a/cranelift/codegen/meta/src/cdsl/mod.rs +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -7,7 +7,6 @@ pub mod formats; pub mod instructions; pub mod isa; pub mod operands; -pub mod regs; pub mod settings; pub mod type_inference; pub mod types; diff --git a/cranelift/codegen/meta/src/cdsl/regs.rs b/cranelift/codegen/meta/src/cdsl/regs.rs deleted file mode 100644 index 11e1d83dd6..0000000000 --- a/cranelift/codegen/meta/src/cdsl/regs.rs +++ /dev/null @@ -1,332 +0,0 @@ -use cranelift_codegen_shared::constants; -use cranelift_entity::{entity_impl, EntityRef, PrimaryMap}; - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct RegBankIndex(u32); -entity_impl!(RegBankIndex); - -pub(crate) struct RegBank { - pub name: &'static str, - pub first_unit: u8, - pub units: u8, - pub names: Vec<&'static str>, - pub prefix: &'static str, - pub pressure_tracking: bool, - pub pinned_reg: Option, - pub toprcs: Vec, - pub classes: Vec, -} - -impl RegBank { - pub fn new( - name: &'static str, - first_unit: u8, - units: u8, - names: Vec<&'static str>, - prefix: &'static str, - pressure_tracking: bool, - pinned_reg: Option, - ) -> Self { - RegBank { - name, - first_unit, - units, - names, - prefix, - pressure_tracking, - pinned_reg, - toprcs: Vec::new(), - classes: Vec::new(), - } - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] -pub(crate) struct RegClassIndex(u32); -entity_impl!(RegClassIndex); - -pub(crate) struct RegClass { - pub name: &'static str, - pub index: RegClassIndex, - pub width: u8, - pub bank: RegBankIndex, - pub toprc: RegClassIndex, - pub count: u8, - pub start: u8, - pub subclasses: Vec, -} - -impl RegClass { - pub fn new( - name: &'static str, - index: RegClassIndex, - width: u8, - bank: RegBankIndex, - toprc: RegClassIndex, - count: u8, - start: u8, - ) -> Self { - Self { - name, - index, - width, - bank, - toprc, - count, - start, - subclasses: Vec::new(), - } - } - - /// Compute a bit-mask of subclasses, including self. - pub fn subclass_mask(&self) -> u64 { - let mut m = 1 << self.index.index(); - for rc in self.subclasses.iter() { - m |= 1 << rc.index(); - } - m - } - - /// Compute a bit-mask of the register units allocated by this register class. - pub fn mask(&self, bank_first_unit: u8) -> Vec { - let mut u = (self.start + bank_first_unit) as usize; - let mut out_mask = vec![0, 0, 0]; - for _ in 0..self.count { - out_mask[u / 32] |= 1 << (u % 32); - u += self.width as usize; - } - out_mask - } -} - -pub(crate) enum RegClassProto { - TopLevel(RegBankIndex), -} - -pub(crate) struct RegClassBuilder { - pub name: &'static str, - pub width: u8, - pub count: u8, - pub start: u8, - pub proto: RegClassProto, -} - -impl RegClassBuilder { - pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self { - Self { - name, - width: 1, - count: 0, - start: 0, - proto: RegClassProto::TopLevel(bank), - } - } - pub fn count(mut self, count: u8) -> Self { - self.count = count; - self - } - pub fn width(mut self, width: u8) -> Self { - match self.proto { - RegClassProto::TopLevel(_) => self.width = width, - } - self - } -} - -pub(crate) struct RegBankBuilder { - pub name: &'static str, - pub units: u8, - pub names: Vec<&'static str>, - pub prefix: &'static str, - pub pressure_tracking: Option, - pub pinned_reg: Option, -} - -impl RegBankBuilder { - pub fn new(name: &'static str, prefix: &'static str) -> Self { - Self { - name, - units: 0, - names: vec![], - prefix, - pressure_tracking: None, - pinned_reg: None, - } - } - pub fn units(mut self, units: u8) -> Self { - self.units = units; - self - } - pub fn names(mut self, names: Vec<&'static str>) -> Self { - self.names = names; - self - } - pub fn track_pressure(mut self, track: bool) -> Self { - self.pressure_tracking = Some(track); - self - } -} - -pub(crate) struct IsaRegsBuilder { - pub banks: PrimaryMap, - pub classes: PrimaryMap, -} - -impl IsaRegsBuilder { - pub fn new() -> Self { - Self { - banks: PrimaryMap::new(), - classes: PrimaryMap::new(), - } - } - - pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex { - let first_unit = if self.banks.is_empty() { - 0 - } else { - let last = &self.banks.last().unwrap(); - let first_available_unit = (last.first_unit + last.units) as i8; - let units = builder.units; - let align = if units.is_power_of_two() { - units - } else { - units.next_power_of_two() - } as i8; - (first_available_unit + align - 1) & -align - } as u8; - - self.banks.push(RegBank::new( - builder.name, - first_unit, - builder.units, - builder.names, - builder.prefix, - builder - .pressure_tracking - .expect("Pressure tracking must be explicitly set"), - builder.pinned_reg, - )) - } - - pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex { - let class_index = self.classes.next_key(); - - // Finish delayed construction of RegClass. - let (bank, toprc, start, width) = match builder.proto { - RegClassProto::TopLevel(bank_index) => { - self.banks - .get_mut(bank_index) - .unwrap() - .toprcs - .push(class_index); - (bank_index, class_index, builder.start, builder.width) - } - }; - - let reg_bank_units = self.banks.get(bank).unwrap().units; - assert!(start < reg_bank_units); - - let count = if builder.count != 0 { - builder.count - } else { - reg_bank_units / width - }; - - let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start); - self.classes.push(reg_class); - - let reg_bank = self.banks.get_mut(bank).unwrap(); - reg_bank.classes.push(class_index); - - class_index - } - - /// Checks that the set of register classes satisfies: - /// - /// 1. Closed under intersection: The intersection of any two register - /// classes in the set is either empty or identical to a member of the - /// set. - /// 2. There are no identical classes under different names. - /// 3. Classes are sorted topologically such that all subclasses have a - /// higher index that the superclass. - pub fn build(self) -> IsaRegs { - for reg_bank in self.banks.values() { - for i1 in reg_bank.classes.iter() { - for i2 in reg_bank.classes.iter() { - if i1 >= i2 { - continue; - } - - let rc1 = self.classes.get(*i1).unwrap(); - let rc2 = self.classes.get(*i2).unwrap(); - - let rc1_mask = rc1.mask(0); - let rc2_mask = rc2.mask(0); - - assert!( - rc1.width != rc2.width || rc1_mask != rc2_mask, - "no duplicates" - ); - if rc1.width != rc2.width { - continue; - } - - let mut intersect = Vec::new(); - for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) { - intersect.push(a & b); - } - if intersect == vec![0; intersect.len()] { - continue; - } - - // Classes must be topologically ordered, so the intersection can't be the - // superclass. - assert!(intersect != rc1_mask); - - // If the intersection is the second one, then it must be a subclass. - if intersect == rc2_mask { - assert!(self - .classes - .get(*i1) - .unwrap() - .subclasses - .iter() - .any(|x| *x == *i2)); - } - } - } - } - - assert!( - self.classes.len() <= constants::MAX_NUM_REG_CLASSES, - "Too many register classes" - ); - - let num_toplevel = self - .classes - .values() - .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking) - .count(); - - assert!( - num_toplevel <= constants::MAX_TRACKED_TOP_RCS, - "Too many top-level register classes" - ); - - IsaRegs::new(self.banks, self.classes) - } -} - -pub(crate) struct IsaRegs { - pub banks: PrimaryMap, - pub classes: PrimaryMap, -} - -impl IsaRegs { - fn new( - banks: PrimaryMap, - classes: PrimaryMap, - ) -> Self { - Self { banks, classes } - } -} diff --git a/cranelift/codegen/meta/src/gen_registers.rs b/cranelift/codegen/meta/src/gen_registers.rs deleted file mode 100644 index bd5ac95ae0..0000000000 --- a/cranelift/codegen/meta/src/gen_registers.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Generate the ISA-specific registers. -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{RegBank, RegClass}; -use crate::error; -use crate::srcgen::Formatter; -use cranelift_entity::EntityRef; - -fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) { - let names = if !reg_bank.names.is_empty() { - format!(r#""{}""#, reg_bank.names.join(r#"", ""#)) - } else { - "".to_string() - }; - fmtln!(fmt, "RegBank {"); - fmt.indent(|fmt| { - fmtln!(fmt, r#"name: "{}","#, reg_bank.name); - fmtln!(fmt, "first_unit: {},", reg_bank.first_unit); - fmtln!(fmt, "units: {},", reg_bank.units); - fmtln!(fmt, "names: &[{}],", names); - fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix); - fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index()); - fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len()); - fmtln!( - fmt, - "pressure_tracking: {},", - if reg_bank.pressure_tracking { - "true" - } else { - "false" - } - ); - }); - fmtln!(fmt, "},"); -} - -fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) { - let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap(); - - let mask: Vec = reg_class - .mask(reg_bank.first_unit) - .iter() - .map(|x| format!("0x{:08x}", x)) - .collect(); - let mask = mask.join(", "); - - fmtln!( - fmt, - "pub static {}_DATA: RegClassData = RegClassData {{", - reg_class.name - ); - fmt.indent(|fmt| { - fmtln!(fmt, r#"name: "{}","#, reg_class.name); - fmtln!(fmt, "index: {},", reg_class.index.index()); - fmtln!(fmt, "width: {},", reg_class.width); - fmtln!(fmt, "bank: {},", reg_class.bank.index()); - fmtln!(fmt, "toprc: {},", reg_class.toprc.index()); - fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start); - fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask()); - fmtln!(fmt, "mask: [{}],", mask); - fmtln!( - fmt, - "pinned_reg: {:?},", - reg_bank - .pinned_reg - .map(|index| index + reg_bank.first_unit as u16 + reg_class.start as u16) - ); - fmtln!(fmt, "info: &INFO,"); - }); - fmtln!(fmt, "};"); - - fmtln!(fmt, "#[allow(dead_code)]"); - fmtln!( - fmt, - "pub static {}: RegClass = &{}_DATA;", - reg_class.name, - reg_class.name - ); -} - -fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) { - for unit in 0..reg_bank.units { - let v = unit + reg_bank.first_unit; - if (unit as usize) < reg_bank.names.len() { - fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v); - continue; - } - fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v); - } -} - -fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) { - // Emit RegInfo. - fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {"); - - fmt.indent(|fmt| { - fmtln!(fmt, "banks: &["); - // Bank descriptors. - fmt.indent(|fmt| { - for reg_bank in isa.regs.banks.values() { - gen_regbank(fmt, ®_bank); - } - }); - fmtln!(fmt, "],"); - // References to register classes. - fmtln!(fmt, "classes: &["); - fmt.indent(|fmt| { - for reg_class in isa.regs.classes.values() { - fmtln!(fmt, "&{}_DATA,", reg_class.name); - } - }); - fmtln!(fmt, "],"); - }); - fmtln!(fmt, "};"); - - // Register class descriptors. - for rc in isa.regs.classes.values() { - gen_regclass(&isa, rc, fmt); - } - - // Emit constants for all the register units. - fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]"); - fmtln!(fmt, "#[derive(Clone, Copy)]"); - fmtln!(fmt, "pub enum RU {"); - fmt.indent(|fmt| { - for reg_bank in isa.regs.banks.values() { - gen_regbank_units(reg_bank, fmt); - } - }); - fmtln!(fmt, "}"); - - // Emit Into conversion for the RU class. - fmtln!(fmt, "impl Into for RU {"); - fmt.indent(|fmt| { - fmtln!(fmt, "fn into(self) -> RegUnit {"); - fmt.indent(|fmt| { - fmtln!(fmt, "self as RegUnit"); - }); - fmtln!(fmt, "}"); - }); - fmtln!(fmt, "}"); -} - -pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(&isa, &mut fmt); - fmt.update_file(filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/isa/arm32/mod.rs b/cranelift/codegen/meta/src/isa/arm32/mod.rs index f524a87ea7..1c3b4d1fe0 100644 --- a/cranelift/codegen/meta/src/isa/arm32/mod.rs +++ b/cranelift/codegen/meta/src/isa/arm32/mod.rs @@ -1,5 +1,4 @@ use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; @@ -9,46 +8,8 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { setting.build() } -fn define_regs() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("FloatRegs", "s") - .units(64) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("IntRegs", "r") - .units(16) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["nzcv"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - regs.build() -} - pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); - let regs = define_regs(); - TargetIsa::new("arm32", settings, regs) + TargetIsa::new("arm32", settings) } diff --git a/cranelift/codegen/meta/src/isa/arm64/mod.rs b/cranelift/codegen/meta/src/isa/arm64/mod.rs index a8920f703d..5fd7b69309 100644 --- a/cranelift/codegen/meta/src/isa/arm64/mod.rs +++ b/cranelift/codegen/meta/src/isa/arm64/mod.rs @@ -1,5 +1,4 @@ use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; @@ -12,42 +11,8 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { setting.build() } -fn define_registers() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - // The `x31` regunit serves as the stack pointer / zero register depending on context. We - // reserve it and don't model the difference. - let builder = RegBankBuilder::new("IntRegs", "x") - .units(32) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FloatRegs", "v") - .units(32) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["nzcv"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - regs.build() -} - pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); - let regs = define_registers(); - TargetIsa::new("arm64", settings, regs) + TargetIsa::new("arm64", settings) } diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x/mod.rs index a4fb05a9f5..1e36e462c6 100644 --- a/cranelift/codegen/meta/src/isa/s390x/mod.rs +++ b/cranelift/codegen/meta/src/isa/s390x/mod.rs @@ -1,5 +1,4 @@ use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::IsaRegsBuilder; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; @@ -43,7 +42,6 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); - let regs = IsaRegsBuilder::new().build(); - TargetIsa::new("s390x", settings, regs) + TargetIsa::new("s390x", settings) } diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index e21bfe1485..b4c670fce7 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -1,5 +1,4 @@ use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::IsaRegsBuilder; use crate::shared::Definitions as SharedDefinitions; @@ -8,5 +7,5 @@ pub(crate) mod settings; pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = settings::define(&shared_defs.settings); - TargetIsa::new("x86", settings, IsaRegsBuilder::new().build()) + TargetIsa::new("x86", settings) } diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index d8972702a3..77fcbc7bf9 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -8,7 +8,6 @@ pub mod error; pub mod isa; mod gen_inst; -mod gen_registers; mod gen_settings; mod gen_types; @@ -55,8 +54,6 @@ pub fn generate( )?; for isa in target_isas { - gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; - gen_settings::generate( &isa.settings, gen_settings::ParentGroup::Shared, From 9e5201d88f8823514170942e17255a5d90a359df Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 13:11:28 +0200 Subject: [PATCH 07/14] Fix all dead-code warnings in cranelift-codegen --- cranelift/codegen/src/abi.rs | 90 ----- cranelift/codegen/src/isa/enc_tables.rs | 73 +--- cranelift/codegen/src/isa/unwind.rs | 71 ---- cranelift/codegen/src/isa/unwind/systemv.rs | 203 ---------- cranelift/codegen/src/isa/unwind/winx64.rs | 71 ---- cranelift/codegen/src/legalizer/call.rs | 54 --- cranelift/codegen/src/legalizer/mod.rs | 388 +------------------- 7 files changed, 2 insertions(+), 948 deletions(-) delete mode 100644 cranelift/codegen/src/legalizer/call.rs diff --git a/cranelift/codegen/src/abi.rs b/cranelift/codegen/src/abi.rs index 883ec546e4..29fdbf4c86 100644 --- a/cranelift/codegen/src/abi.rs +++ b/cranelift/codegen/src/abi.rs @@ -4,8 +4,6 @@ //! `TargetIsa::legalize_signature()` method. use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type}; -use alloc::borrow::Cow; -use alloc::vec::Vec; use core::cmp::Ordering; /// Legalization action to perform on a single argument or return value when converting a @@ -18,10 +16,6 @@ pub enum ArgAction { /// Assign the argument to the given location. Assign(ArgumentLoc), - /// Assign the argument to the given location and change the type to the specified type. - /// This is used by [`ArgumentPurpose::StructArgument`]. - AssignAndChangeType(ArgumentLoc, Type), - /// Convert the argument, then call again. /// /// This action can split an integer type into two smaller integer arguments, or it can split a @@ -63,34 +57,6 @@ pub enum ValueConversion { Pointer(Type), } -impl ValueConversion { - /// Apply this conversion to a type, return the converted type. - pub fn apply(self, ty: Type) -> Type { - match self { - Self::IntSplit => ty.half_width().expect("Integer type too small to split"), - Self::VectorSplit => ty.half_vector().expect("Not a vector"), - Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"), - Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty, - } - } - - /// Is this a split conversion that results in two arguments? - pub fn is_split(self) -> bool { - match self { - Self::IntSplit | Self::VectorSplit => true, - _ => false, - } - } - - /// Is this a conversion to pointer? - pub fn is_pointer(self) -> bool { - match self { - Self::Pointer(_) => true, - _ => false, - } - } -} - /// Common trait for assigning arguments to registers or stack locations. /// /// This will be implemented by individual ISAs. @@ -99,62 +65,6 @@ pub trait ArgAssigner { fn assign(&mut self, arg: &AbiParam) -> ArgAction; } -/// Legalize the arguments in `args` using the given argument assigner. -/// -/// This function can be used for both arguments and return values. -pub fn legalize_args(args: &[AbiParam], aa: &mut AA) -> Option> { - let mut args = Cow::Borrowed(args); - - // Iterate over the arguments. - // We may need to mutate the vector in place, so don't use a normal iterator, and clone the - // argument to avoid holding a reference. - let mut argno = 0; - while let Some(arg) = args.get(argno).cloned() { - // Leave the pre-assigned arguments alone. - // We'll assume that they don't interfere with our assignments. - if arg.location.is_assigned() { - argno += 1; - continue; - } - - match aa.assign(&arg) { - // Assign argument to a location and move on to the next one. - ArgAction::Assign(loc) => { - args.to_mut()[argno].location = loc; - argno += 1; - } - // Assign argument to a location, change type to the requested one and move on to the - // next one. - ArgAction::AssignAndChangeType(loc, ty) => { - let arg = &mut args.to_mut()[argno]; - arg.location = loc; - arg.value_type = ty; - argno += 1; - } - // Split this argument into two smaller ones. Then revisit both. - ArgAction::Convert(conv) => { - debug_assert!( - !arg.legalized_to_pointer, - "No more conversions allowed after conversion to pointer" - ); - let value_type = conv.apply(arg.value_type); - args.to_mut()[argno].value_type = value_type; - if conv.is_pointer() { - args.to_mut()[argno].legalized_to_pointer = true; - } else if conv.is_split() { - let new_arg = AbiParam { value_type, ..arg }; - args.to_mut().insert(argno + 1, new_arg); - } - } - } - } - - match args { - Cow::Borrowed(_) => None, - Cow::Owned(a) => Some(a), - } -} - /// Determine the right action to take when passing a `have` value type to a call signature where /// the next argument is `arg` which has a different value type. /// diff --git a/cranelift/codegen/src/isa/enc_tables.rs b/cranelift/codegen/src/isa/enc_tables.rs index e21557497e..b18479bee7 100644 --- a/cranelift/codegen/src/isa/enc_tables.rs +++ b/cranelift/codegen/src/isa/enc_tables.rs @@ -3,11 +3,10 @@ //! This module contains types and functions for working with the encoding tables generated by //! `cranelift-codegen/meta/src/gen_encodings.rs`. -use crate::constant_hash::{probe, Table}; +use crate::constant_hash::Table; use crate::ir::{Function, InstructionData, Opcode, Type}; use crate::isa::{Encoding, Legalize}; use crate::settings::PredicateView; -use core::ops::Range; /// A recipe predicate. /// @@ -49,14 +48,6 @@ pub struct Level1Entry + Copy> { pub offset: OffT, } -impl + Copy> Level1Entry { - /// Get the level 2 table range indicated by this entry. - fn range(&self) -> Range { - let b = self.offset.into() as usize; - b..b + (1 << self.log2len) - } -} - impl + Copy> Table for [Level1Entry] { fn len(&self) -> usize { self.len() @@ -97,68 +88,6 @@ impl + Copy> Table for [Level2Entry] { } } -/// Two-level hash table lookup and iterator construction. -/// -/// Given the controlling type variable and instruction opcode, find the corresponding encoding -/// list. -/// -/// Returns an iterator that produces legal encodings for `inst`. -pub fn lookup_enclist<'a, OffT1, OffT2>( - ctrl_typevar: Type, - inst: &'a InstructionData, - func: &'a Function, - level1_table: &'static [Level1Entry], - level2_table: &'static [Level2Entry], - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, -) -> Encodings<'a> -where - OffT1: Into + Copy, - OffT2: Into + Copy, -{ - let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) { - Err(l1idx) => { - // No level 1 entry found for the type. - // We have a sentinel entry with the default legalization code. - (!0, level1_table[l1idx].legalize) - } - Ok(l1idx) => { - // We have a valid level 1 entry for this type. - let l1ent = &level1_table[l1idx]; - let offset = match level2_table.get(l1ent.range()) { - Some(l2tab) => { - let opcode = inst.opcode(); - match probe(l2tab, opcode, opcode as usize) { - Ok(l2idx) => l2tab[l2idx].offset.into() as usize, - Err(_) => !0, - } - } - // The l1ent range is invalid. This means that we just have a customized - // legalization code for this type. The level 2 table is empty. - None => !0, - }; - (offset, l1ent.legalize) - } - }; - - // Now we have an offset into `enclist` that is `!0` when no encoding list could be found. - // The default legalization code is always valid. - Encodings::new( - offset, - legalize, - inst, - func, - enclist, - legalize_actions, - recipe_preds, - inst_preds, - isa_preds, - ) -} - /// Encoding list entry. /// /// Encoding lists are represented as sequences of u16 words. diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index 13397c3266..4dd8ae78dd 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -24,77 +24,6 @@ pub enum UnwindInfo { SystemV(systemv::UnwindInfo), } -/// Intermediate representation for the unwind information -/// generated by a backend. -pub mod input { - use crate::binemit::CodeOffset; - use alloc::vec::Vec; - #[cfg(feature = "enable-serde")] - use serde::{Deserialize, Serialize}; - - /// Elementary operation in the unwind operations. - #[derive(Clone, Debug, PartialEq, Eq)] - #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] - pub enum UnwindCode { - /// Defines that a register is saved at the specified offset. - SaveRegister { - /// The saved register. - reg: Reg, - /// The specified offset relative to the stack pointer. - stack_offset: u32, - }, - /// Defines that a register is as defined before call. - RestoreRegister { - /// The restored register. - reg: Reg, - }, - /// The stack pointer was adjusted to allocate the stack. - StackAlloc { - /// Size to allocate. - size: u32, - }, - /// The stack pointer was adjusted to free the stack. - StackDealloc { - /// Size to deallocate. - size: u32, - }, - /// The alternative register was assigned as frame pointer base. - SetFramePointer { - /// The specified register. - reg: Reg, - }, - /// Restores a frame pointer base to default register. - RestoreFramePointer, - /// Saves the state. - RememberState, - /// Restores the state. - RestoreState, - /// On aarch64 ARMv8.3+ devices, enables or disables pointer authentication. - Aarch64SetPointerAuth { - /// Whether return addresses (hold in LR) contain a pointer-authentication code. - return_addresses: bool, - }, - } - - /// Unwind information as generated by a backend. - #[derive(Clone, Debug, PartialEq, Eq)] - #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] - pub struct UnwindInfo { - /// Size of the prologue. - pub prologue_size: CodeOffset, - /// Unwind codes for prologue. - pub prologue_unwind_codes: Vec<(CodeOffset, UnwindCode)>, - /// Unwind codes for epilogues. - pub epilogues_unwind_codes: Vec)>>, - /// Entire function size. - pub function_size: CodeOffset, - /// Platform word size in bytes. - pub word_size: u8, - /// Initial stack pointer offset. - pub initial_sp_offset: u8, - } -} - /// Unwind pseudoinstruction used in VCode backends: represents that /// at the present location, an action has just been taken. /// diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index da3bfea869..b914f13a75 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -1,7 +1,6 @@ //! System V ABI unwind information. use crate::binemit::CodeOffset; -use crate::isa::unwind::input; use crate::isa::unwind::UnwindInst; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; @@ -259,66 +258,6 @@ pub(crate) fn create_unwind_info_from_insts>( } impl UnwindInfo { - // TODO: remove `build()` below when old backend is removed. The new backend uses a simpler - // approach in `create_unwind_info_from_insts()` above. - - pub(crate) fn build<'b, Reg: PartialEq + Copy>( - unwind: input::UnwindInfo, - map_reg: &'b dyn RegisterMapper, - ) -> CodegenResult { - use input::UnwindCode; - let mut builder = InstructionBuilder::new(unwind.initial_sp_offset, map_reg); - - for (offset, c) in unwind.prologue_unwind_codes.iter().chain( - unwind - .epilogues_unwind_codes - .iter() - .map(|c| c.iter()) - .flatten(), - ) { - match c { - UnwindCode::SaveRegister { reg, stack_offset } => { - builder - .save_reg(*offset, *reg, *stack_offset) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::StackAlloc { size } => { - builder.adjust_sp_down_imm(*offset, *size as i64); - } - UnwindCode::StackDealloc { size } => { - builder.adjust_sp_up_imm(*offset, *size as i64); - } - UnwindCode::RestoreRegister { reg } => { - builder - .restore_reg(*offset, *reg) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::SetFramePointer { reg } => { - builder - .set_cfa_reg(*offset, *reg) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::RestoreFramePointer => { - builder.restore_cfa(*offset); - } - UnwindCode::RememberState => { - builder.remember_state(*offset); - } - UnwindCode::RestoreState => { - builder.restore_state(*offset); - } - UnwindCode::Aarch64SetPointerAuth { return_addresses } => { - builder.set_aarch64_pauth(*offset, *return_addresses); - } - } - } - - let instructions = builder.instructions; - let len = unwind.function_size; - - Ok(Self { instructions, len }) - } - /// Converts the unwind information into a `FrameDescriptionEntry`. pub fn to_fde(&self, address: Address) -> gimli::write::FrameDescriptionEntry { let mut fde = FrameDescriptionEntry::new(address, self.len); @@ -330,145 +269,3 @@ impl UnwindInfo { fde } } - -// TODO: delete the builder below when the old backend is removed. - -struct InstructionBuilder<'a, Reg: PartialEq + Copy> { - sp_offset: i32, - frame_register: Option, - saved_state: Option<(i32, Option)>, - map_reg: &'a dyn RegisterMapper, - instructions: Vec<(u32, CallFrameInstruction)>, -} - -impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> { - fn new(sp_offset: u8, map_reg: &'a (dyn RegisterMapper + 'a)) -> Self { - Self { - sp_offset: sp_offset as i32, // CFA offset starts at the specified offset to account for the return address on stack - saved_state: None, - frame_register: None, - map_reg, - instructions: Vec::new(), - } - } - - fn save_reg( - &mut self, - offset: u32, - reg: Reg, - stack_offset: u32, - ) -> Result<(), RegisterMappingError> { - // Pushes in the prologue are register saves, so record an offset of the save - self.instructions.push(( - offset, - CallFrameInstruction::Offset( - self.map_reg.map(reg)?, - stack_offset as i32 - self.sp_offset, - ), - )); - - Ok(()) - } - - fn adjust_sp_down_imm(&mut self, offset: u32, imm: i64) { - assert!(imm <= core::u32::MAX as i64); - - self.sp_offset += imm as i32; - - // Don't adjust the CFA if we're using a frame pointer - if self.frame_register.is_some() { - return; - } - - self.instructions - .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset))); - } - - fn adjust_sp_up_imm(&mut self, offset: u32, imm: i64) { - assert!(imm <= core::u32::MAX as i64); - - self.sp_offset -= imm as i32; - - // Don't adjust the CFA if we're using a frame pointer - if self.frame_register.is_some() { - return; - } - - let cfa_inst_ofs = { - // Scan to find and merge with CFA instruction with the same offset. - let mut it = self.instructions.iter_mut(); - loop { - match it.next_back() { - Some((i_offset, i)) if *i_offset == offset => { - if let CallFrameInstruction::Cfa(_, o) = i { - break Some(o); - } - } - _ => { - break None; - } - } - } - }; - - if let Some(o) = cfa_inst_ofs { - // Update previous CFA instruction. - *o = self.sp_offset; - } else { - // Add just CFA offset instruction. - self.instructions - .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset))); - } - } - - fn set_cfa_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> { - self.instructions.push(( - offset, - CallFrameInstruction::CfaRegister(self.map_reg.map(reg)?), - )); - self.frame_register = Some(reg); - Ok(()) - } - - fn restore_cfa(&mut self, offset: u32) { - // Restore SP and its offset. - self.instructions.push(( - offset, - CallFrameInstruction::Cfa(self.map_reg.sp(), self.sp_offset), - )); - self.frame_register = None; - } - - fn restore_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> { - // Pops in the epilogue are register restores, so record a "same value" for the register - self.instructions.push(( - offset, - CallFrameInstruction::SameValue(self.map_reg.map(reg)?), - )); - - Ok(()) - } - - fn remember_state(&mut self, offset: u32) { - self.saved_state = Some((self.sp_offset, self.frame_register)); - - self.instructions - .push((offset, CallFrameInstruction::RememberState)); - } - - fn restore_state(&mut self, offset: u32) { - let (sp_offset, frame_register) = self.saved_state.take().unwrap(); - self.sp_offset = sp_offset; - self.frame_register = frame_register; - - self.instructions - .push((offset, CallFrameInstruction::RestoreState)); - } - - fn set_aarch64_pauth(&mut self, offset: u32, return_addresses: bool) { - self.instructions.push(( - offset, - CallFrameInstruction::Aarch64SetPointerAuth { return_addresses }, - )); - } -} diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 1c232f6855..68cb38b758 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -1,6 +1,5 @@ //! Windows x64 ABI unwind information. -use crate::isa::unwind::input; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; use log::warn; @@ -259,76 +258,6 @@ impl UnwindInfo { .iter() .fold(0, |nodes, c| nodes + c.node_count()) } - - // TODO: remove `build()` below when old backend is removed. The new backend uses - // a simpler approach in `create_unwind_info_from_insts()` below. - - pub(crate) fn build>( - unwind: input::UnwindInfo, - ) -> CodegenResult { - use crate::isa::unwind::input::UnwindCode as InputUnwindCode; - - let word_size: u32 = unwind.word_size.into(); - let mut unwind_codes = Vec::new(); - for (offset, c) in unwind.prologue_unwind_codes.iter() { - match c { - InputUnwindCode::SaveRegister { reg, stack_offset } => { - let reg = MR::map(*reg); - let offset = ensure_unwind_offset(*offset)?; - match reg { - MappedRegister::Int(reg) => { - // Attempt to convert sequence of the `InputUnwindCode`: - // `StackAlloc { size = word_size }`, `SaveRegister { stack_offset: 0 }` - // to the shorter `UnwindCode::PushRegister`. - let push_reg_sequence = if let Some(UnwindCode::StackAlloc { - instruction_offset: alloc_offset, - size, - }) = unwind_codes.last() - { - *size == word_size && offset == *alloc_offset && *stack_offset == 0 - } else { - false - }; - if push_reg_sequence { - *unwind_codes.last_mut().unwrap() = UnwindCode::PushRegister { - instruction_offset: offset, - reg, - }; - } else { - unwind_codes.push(UnwindCode::SaveReg { - instruction_offset: offset, - reg, - stack_offset: *stack_offset, - }); - } - } - MappedRegister::Xmm(reg) => { - unwind_codes.push(UnwindCode::SaveXmm { - instruction_offset: offset, - reg, - stack_offset: *stack_offset, - }); - } - } - } - InputUnwindCode::StackAlloc { size } => { - unwind_codes.push(UnwindCode::StackAlloc { - instruction_offset: ensure_unwind_offset(*offset)?, - size: *size, - }); - } - _ => {} - } - } - - Ok(Self { - flags: 0, // this assumes cranelift functions have no SEH handlers - prologue_size: ensure_unwind_offset(unwind.prologue_size)?, - frame_register: None, - frame_register_offset: 0, - unwind_codes, - }) - } } const UNWIND_RBP_REG: u8 = 5; diff --git a/cranelift/codegen/src/legalizer/call.rs b/cranelift/codegen/src/legalizer/call.rs deleted file mode 100644 index 4321dbb90b..0000000000 --- a/cranelift/codegen/src/legalizer/call.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Legalization of calls. -//! -//! This module exports the `expand_call` function which transforms a `call` -//! instruction into `func_addr` and `call_indirect` instructions. - -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{self, InstBuilder}; -use crate::isa::TargetIsa; - -/// Expand a `call` instruction. This lowers it to a `call_indirect`, which -/// is only done if the ABI doesn't support direct calls. -pub fn expand_call( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - // Unpack the instruction. - let (func_ref, old_args) = match func.dfg[inst] { - ir::InstructionData::Call { - opcode, - ref args, - func_ref, - } => { - debug_assert_eq!(opcode, ir::Opcode::Call); - (func_ref, args.clone()) - } - _ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)), - }; - - let ptr_ty = isa.pointer_type(); - - let sig = func.dfg.ext_funcs[func_ref].signature; - - let callee = { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.ins().func_addr(ptr_ty, func_ref) - }; - - let mut new_args = ir::ValueList::default(); - new_args.push(callee, &mut func.dfg.value_lists); - for i in 0..old_args.len(&func.dfg.value_lists) { - new_args.push( - old_args.as_slice(&func.dfg.value_lists)[i], - &mut func.dfg.value_lists, - ); - } - - func.dfg - .replace(inst) - .CallIndirect(ir::Opcode::CallIndirect, ptr_ty, sig, new_args); -} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 0ba70915a1..a5a248c0e7 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -15,7 +15,7 @@ use crate::cursor::{Cursor, FuncCursor}; use crate::flowgraph::ControlFlowGraph; -use crate::ir::types::{I32, I64}; +use crate::ir::types::I32; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; @@ -23,7 +23,6 @@ use crate::timing; use alloc::collections::BTreeSet; mod boundary; -mod call; mod globalvalue; mod heap; mod libcall; @@ -320,12 +319,6 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: } } -// Include legalization patterns that were generated by `gen_legalizer.rs` from the -// `TransformGroup` in `cranelift-codegen/meta/shared/legalize.rs`. -// -// Concretely, this defines private functions `narrow()`, and `expand()`. -include!(concat!(env!("OUT_DIR"), "/legalizer.rs")); - /// Custom expansion for conditional trap instructions. /// TODO: Add CFG support to the Rust DSL patterns so we won't have to do this. fn expand_cond_trap( @@ -403,189 +396,6 @@ fn expand_cond_trap( cfg.recompute_block(pos.func, new_block_trap); } -/// Jump tables. -fn expand_br_table( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - if isa.flags().enable_jump_tables() { - expand_br_table_jt(inst, func, cfg, isa); - } else { - expand_br_table_conds(inst, func, cfg, isa); - } -} - -/// Expand br_table to jump table. -fn expand_br_table_jt( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::IntCC; - - let (arg, default_block, table) = match func.dfg[inst] { - ir::InstructionData::BranchTable { - opcode: ir::Opcode::BrTable, - arg, - destination, - table, - } => (arg, destination, table), - _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), - }; - - // Rewrite: - // - // br_table $idx, default_block, $jt - // - // To: - // - // $oob = ifcmp_imm $idx, len($jt) - // brif uge $oob, default_block - // jump fallthrough_block - // - // fallthrough_block: - // $base = jump_table_base.i64 $jt - // $rel_addr = jump_table_entry.i64 $idx, $base, 4, $jt - // $addr = iadd $base, $rel_addr - // indirect_jump_table_br $addr, $jt - - let block = func.layout.pp_block(inst); - let jump_table_block = func.dfg.make_block(); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Bounds check. - let table_size = pos.func.jump_tables[table].len() as i64; - let oob = pos - .ins() - .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size); - - pos.ins().brnz(oob, default_block, &[]); - pos.ins().jump(jump_table_block, &[]); - pos.insert_block(jump_table_block); - - let addr_ty = isa.pointer_type(); - - let arg = if pos.func.dfg.value_type(arg) == addr_ty { - arg - } else { - pos.ins().uextend(addr_ty, arg) - }; - - let base_addr = pos.ins().jump_table_base(addr_ty, table); - let entry = pos - .ins() - .jump_table_entry(arg, base_addr, I32.bytes() as u8, table); - - let addr = pos.ins().iadd(base_addr, entry); - pos.ins().indirect_jump_table_br(addr, table); - - pos.remove_inst(); - cfg.recompute_block(pos.func, block); - cfg.recompute_block(pos.func, jump_table_block); -} - -/// Expand br_table to series of conditionals. -fn expand_br_table_conds( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::IntCC; - - let (arg, default_block, table) = match func.dfg[inst] { - ir::InstructionData::BranchTable { - opcode: ir::Opcode::BrTable, - arg, - destination, - table, - } => (arg, destination, table), - _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), - }; - - let block = func.layout.pp_block(inst); - - // This is a poor man's jump table using just a sequence of conditional branches. - let table_size = func.jump_tables[table].len(); - let mut cond_failed_block = vec![]; - if table_size >= 1 { - cond_failed_block = alloc::vec::Vec::with_capacity(table_size - 1); - for _ in 0..table_size - 1 { - cond_failed_block.push(func.dfg.make_block()); - } - } - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Ignore the lint for this loop as the range needs to be 0 to table_size - #[allow(clippy::needless_range_loop)] - for i in 0..table_size { - let dest = pos.func.jump_tables[table].as_slice()[i]; - let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64); - pos.ins().brnz(t, dest, &[]); - // Jump to the next case. - if i < table_size - 1 { - let block = cond_failed_block[i]; - pos.ins().jump(block, &[]); - pos.insert_block(block); - } - } - - // `br_table` jumps to the default destination if nothing matches - pos.ins().jump(default_block, &[]); - - pos.remove_inst(); - cfg.recompute_block(pos.func, block); - for failed_block in cond_failed_block.into_iter() { - cfg.recompute_block(pos.func, failed_block); - } -} - -/// Expand the select instruction. -/// -/// Conditional moves are available in some ISAs for some register classes. The remaining selects -/// are handled by a branch. -fn expand_select( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let (ctrl, tval, fval) = match func.dfg[inst] { - ir::InstructionData::Ternary { - opcode: ir::Opcode::Select, - args, - } => (args[0], args[1], args[2]), - _ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)), - }; - - // Replace `result = select ctrl, tval, fval` with: - // - // brnz ctrl, new_block(tval) - // jump new_block(fval) - // new_block(result): - let old_block = func.layout.pp_block(inst); - let result = func.dfg.first_result(inst); - func.dfg.clear_results(inst); - let new_block = func.dfg.make_block(); - func.dfg.attach_block_param(new_block, result); - - func.dfg.replace(inst).brnz(ctrl, new_block, &[tval]); - let mut pos = FuncCursor::new(func).after_inst(inst); - pos.use_srcloc(inst); - pos.ins().jump(new_block, &[fval]); - pos.insert_block(new_block); - - cfg.recompute_block(pos.func, new_block); - cfg.recompute_block(pos.func, old_block); -} - fn expand_br_icmp( inst: ir::Inst, func: &mut ir::Function, @@ -620,34 +430,6 @@ fn expand_br_icmp( cfg.recompute_block(pos.func, old_block); } -/// Expand illegal `f32const` and `f64const` instructions. -fn expand_fconst( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let ty = func.dfg.value_type(func.dfg.first_result(inst)); - debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty); - - // In the future, we may want to generate constant pool entries for these constants, but for - // now use an `iconst` and a bit cast. - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - let ival = match pos.func.dfg[inst] { - ir::InstructionData::UnaryIeee32 { - opcode: ir::Opcode::F32const, - imm, - } => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())), - ir::InstructionData::UnaryIeee64 { - opcode: ir::Opcode::F64const, - imm, - } => pos.ins().iconst(ir::types::I64, imm.bits() as i64), - _ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)), - }; - pos.func.dfg.replace(inst).bitcast(ty, ival); -} - /// Expand illegal `stack_load` instructions. fn expand_stack_load( inst: ir::Inst, @@ -713,171 +495,3 @@ fn expand_stack_store( mflags.set_aligned(); pos.func.dfg.replace(inst).store(mflags, val, addr, 0); } - -/// Split a load into two parts before `iconcat`ing the result together. -fn narrow_load( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let (ptr, offset, flags) = match pos.func.dfg[inst] { - ir::InstructionData::Load { - opcode: ir::Opcode::Load, - arg, - offset, - flags, - } => (arg, offset, flags), - _ => panic!("Expected load: {}", pos.func.dfg.display_inst(inst, None)), - }; - - let res_ty = pos.func.dfg.ctrl_typevar(inst); - let small_ty = res_ty.half_width().expect("Can't narrow load"); - - let al = pos.ins().load(small_ty, flags, ptr, offset); - let ah = pos.ins().load( - small_ty, - flags, - ptr, - offset.try_add_i64(8).expect("load offset overflow"), - ); - let (al, ah) = match flags.endianness(isa.endianness()) { - ir::Endianness::Little => (al, ah), - ir::Endianness::Big => (ah, al), - }; - pos.func.dfg.replace(inst).iconcat(al, ah); -} - -/// Split a store into two parts after `isplit`ing the value. -fn narrow_store( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let (val, ptr, offset, flags) = match pos.func.dfg[inst] { - ir::InstructionData::Store { - opcode: ir::Opcode::Store, - args, - offset, - flags, - } => (args[0], args[1], offset, flags), - _ => panic!("Expected store: {}", pos.func.dfg.display_inst(inst, None)), - }; - - let (al, ah) = pos.ins().isplit(val); - let (al, ah) = match flags.endianness(isa.endianness()) { - ir::Endianness::Little => (al, ah), - ir::Endianness::Big => (ah, al), - }; - pos.ins().store(flags, al, ptr, offset); - pos.ins().store( - flags, - ah, - ptr, - offset.try_add_i64(8).expect("store offset overflow"), - ); - pos.remove_inst(); -} - -/// Expands an illegal iconst value by splitting it into two. -fn narrow_iconst( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let imm: i64 = if let ir::InstructionData::UnaryImm { - opcode: ir::Opcode::Iconst, - imm, - } = &func.dfg[inst] - { - (*imm).into() - } else { - panic!("unexpected instruction in narrow_iconst"); - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let ty = pos.func.dfg.ctrl_typevar(inst); - if isa.pointer_bits() == 32 && ty == I64 { - let low = pos.ins().iconst(I32, imm & 0xffffffff); - let high = pos.ins().iconst(I32, imm >> 32); - // The instruction has as many results as iconcat, so no need to replace them. - pos.func.dfg.replace(inst).iconcat(low, high); - return; - } - - unimplemented!("missing encoding or legalization for iconst.{:?}", ty); -} - -fn narrow_icmp_imm( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::{CondCode, IntCC}; - - let (arg, cond, imm): (ir::Value, IntCC, i64) = match func.dfg[inst] { - ir::InstructionData::IntCompareImm { - opcode: ir::Opcode::IcmpImm, - arg, - cond, - imm, - } => (arg, cond, imm.into()), - _ => panic!("unexpected instruction in narrow_icmp_imm"), - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let ty = pos.func.dfg.ctrl_typevar(inst); - let ty_half = ty.half_width().unwrap(); - - let mask = ((1u128 << ty_half.bits()) - 1) as i64; - let imm_low = pos.ins().iconst(ty_half, imm & mask); - let imm_high = pos.ins().iconst( - ty_half, - imm.checked_shr(ty_half.bits().into()).unwrap_or(0) & mask, - ); - let (arg_low, arg_high) = pos.ins().isplit(arg); - - match cond { - IntCC::Equal => { - let res_low = pos.ins().icmp(cond, arg_low, imm_low); - let res_high = pos.ins().icmp(cond, arg_high, imm_high); - pos.func.dfg.replace(inst).band(res_low, res_high); - } - IntCC::NotEqual => { - let res_low = pos.ins().icmp(cond, arg_low, imm_low); - let res_high = pos.ins().icmp(cond, arg_high, imm_high); - pos.func.dfg.replace(inst).bor(res_low, res_high); - } - IntCC::SignedGreaterThan - | IntCC::SignedGreaterThanOrEqual - | IntCC::SignedLessThan - | IntCC::SignedLessThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedGreaterThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::UnsignedLessThanOrEqual => { - let b1 = pos.ins().icmp(cond.without_equal(), arg_high, imm_high); - let b2 = pos - .ins() - .icmp(cond.inverse().without_equal(), arg_high, imm_high); - let b3 = pos.ins().icmp(cond.unsigned(), arg_low, imm_low); - let c1 = pos.ins().bnot(b2); - let c2 = pos.ins().band(c1, b3); - pos.func.dfg.replace(inst).bor(b1, c2); - } - _ => unimplemented!("missing legalization for condition {:?}", cond), - } -} From 53ec12d5195471036715c05c7c8036cb8a55e47f Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 21 Jun 2021 13:24:46 +0200 Subject: [PATCH 08/14] Rustfmt --- cranelift/codegen/meta/src/cdsl/instructions.rs | 12 +++--------- cranelift/codegen/meta/src/cdsl/typevar.rs | 1 - cranelift/codegen/meta/src/shared/mod.rs | 2 +- cranelift/codegen/src/machinst/adapter.rs | 4 +--- cranelift/filetests/src/function_runner.rs | 4 ++-- 5 files changed, 7 insertions(+), 16 deletions(-) diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index 50b3a82cc1..ba9aeebad6 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -25,9 +25,7 @@ pub(crate) struct InstructionGroupBuilder<'all_inst> { impl<'all_inst> InstructionGroupBuilder<'all_inst> { pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self { - Self { - all_instructions, - } + Self { all_instructions } } pub fn push(&mut self, builder: InstructionBuilder) { @@ -449,13 +447,9 @@ impl Bindable for BoundInstruction { fn bind(&self, parameter: impl Into) -> BoundInstruction { let mut modified = self.clone(); match parameter.into() { - BindParameter::Lane(lane_type) => modified - .value_types - .push(lane_type.into()), + BindParameter::Lane(lane_type) => modified.value_types.push(lane_type.into()), BindParameter::Reference(reference_type) => { - modified - .value_types - .push(reference_type.into()); + modified.value_types.push(reference_type.into()); } } modified.verify_bindings().unwrap(); diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index af1ba966a4..eea3e2724c 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -895,7 +895,6 @@ fn test_forward_images() { ); } - #[test] #[should_panic] fn test_typeset_singleton_panic_nonsingleton_types() { diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs index 53ad796c8c..521e058ec9 100644 --- a/cranelift/codegen/meta/src/shared/mod.rs +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -8,7 +8,7 @@ pub mod settings; pub mod types; use crate::cdsl::formats::{FormatStructure, InstructionFormat}; -use crate::cdsl::instructions::{AllInstructions}; +use crate::cdsl::instructions::AllInstructions; use crate::cdsl::settings::SettingGroup; use crate::shared::entities::EntityRefs; diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index b60bf8300a..543084a0b5 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -2,9 +2,7 @@ use crate::binemit; use crate::ir; -use crate::isa::{ - EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, -}; +use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa}; use crate::machinst::*; use crate::regalloc::RegisterSet; use crate::settings::{self, Flags}; diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index 58a321adba..45170d90ee 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -48,8 +48,8 @@ impl SingleFunctionCompiler { /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags. pub fn with_host_isa(flags: settings::Flags) -> Self { - let builder = builder_with_options(true) - .expect("Unable to build a TargetIsa for the current host"); + let builder = + builder_with_options(true).expect("Unable to build a TargetIsa for the current host"); let isa = builder.finish(flags); Self::new(isa) } From a646f685538c1d138d8e553133325d8af72300a9 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 17:37:23 +0200 Subject: [PATCH 09/14] Remove legacy x86_64 backend tests --- .../filetests/filetests/isa/x64/bextend.clif | 2 +- .../filetests/filetests/runtests/alias.clif | 2 +- .../filetests/runtests/arithmetic.clif | 2 +- .../filetests/runtests/atomic-rmw-2.clif | 2 +- .../filetests/runtests/atomic-rmw.clif | 2 +- .../filetests/filetests/runtests/bint.clif | 2 +- .../filetests/filetests/runtests/bitops.clif | 3 +- .../filetests/filetests/runtests/br.clif | 3 +- .../filetests/filetests/runtests/br_icmp.clif | 2 +- .../filetests/runtests/br_icmp_overflow.clif | 2 +- .../filetests/runtests/br_table.clif | 2 +- .../filetests/filetests/runtests/const.clif | 3 +- .../filetests/runtests/div-checks.clif | 2 +- .../filetests/filetests/runtests/extend.clif | 3 +- .../runtests/fmin-max-pseudo-vector.clif | 2 +- .../filetests/runtests/fmin-max-pseudo.clif | 2 +- .../filetests/filetests/runtests/heap.clif | 2 +- .../runtests/i128-arithmetic-legacy.clif | 20 -- .../filetests/runtests/i128-arithmetic.clif | 2 +- .../filetests/runtests/i128-bint.clif | 2 +- .../filetests/runtests/i128-bitops-count.clif | 2 +- .../filetests/runtests/i128-bitops.clif | 2 +- .../filetests/runtests/i128-bitrev.clif | 3 +- .../filetests/filetests/runtests/i128-br.clif | 3 +- .../filetests/runtests/i128-concat-split.clif | 2 +- .../filetests/runtests/i128-const.clif | 2 +- .../filetests/runtests/i128-extend-2.clif | 64 ----- .../filetests/runtests/i128-extend.clif | 62 ++++- .../filetests/runtests/i128-icmp.clif | 2 +- .../filetests/runtests/i128-load-store.clif | 2 +- .../filetests/runtests/i128-reduce.clif | 2 +- .../filetests/runtests/i128-rotate.clif | 2 +- .../filetests/runtests/i128-select.clif | 2 +- .../filetests/runtests/i128-shifts.clif | 2 +- .../filetests/filetests/runtests/icmp-eq.clif | 2 +- .../filetests/filetests/runtests/icmp-ne.clif | 2 +- .../filetests/runtests/icmp-nof.clif | 2 +- .../filetests/filetests/runtests/icmp-of.clif | 2 +- .../filetests/runtests/icmp-sge.clif | 2 +- .../filetests/runtests/icmp-sgt.clif | 2 +- .../filetests/runtests/icmp-sle.clif | 2 +- .../filetests/runtests/icmp-slt.clif | 2 +- .../filetests/runtests/icmp-uge.clif | 2 +- .../filetests/runtests/icmp-ugt.clif | 2 +- .../filetests/runtests/icmp-ule.clif | 2 +- .../filetests/runtests/icmp-ult.clif | 2 +- .../filetests/filetests/runtests/icmp.clif | 2 +- .../filetests/filetests/runtests/shifts.clif | 2 +- ...md-arithmetic-nondeterministic-x86_64.clif | 2 +- .../filetests/runtests/simd-arithmetic.clif | 4 +- .../runtests/simd-bitselect-to-vselect.clif | 7 +- .../filetests/runtests/simd-bitwise-run.clif | 2 +- .../filetests/runtests/simd-bitwise.clif | 2 +- .../runtests/simd-comparison-legacy.clif | 44 ---- .../filetests/runtests/simd-comparison.clif | 4 +- .../filetests/runtests/simd-conversion.clif | 4 +- .../filetests/runtests/simd-iabs.clif | 2 +- .../runtests/simd-lane-access-legacy.clif | 221 ------------------ .../filetests/runtests/simd-lane-access.clif | 2 +- .../filetests/runtests/simd-logical.clif | 2 +- .../runtests/simd-sqmulroundsat.clif | 2 +- .../filetests/runtests/simd-swidenhigh.clif | 2 +- .../filetests/runtests/simd-swidenlow.clif | 2 +- .../filetests/runtests/simd-swizzle.clif | 2 +- .../filetests/runtests/simd-uwidenhigh.clif | 2 +- .../filetests/runtests/simd-uwidenlow.clif | 2 +- .../filetests/runtests/simd-valltrue.clif | 2 +- .../filetests/runtests/simd-vanytrue.clif | 2 +- .../simd-vconst-optimized-legacy.clif | 46 ---- .../filetests/runtests/simd-vconst.clif | 6 +- .../filetests/runtests/simd-vhighbits.clif | 2 +- .../filetests/runtests/simd-vselect.clif | 2 +- .../filetests/filetests/runtests/smulhi.clif | 2 +- .../filetests/runtests/spill-reload.clif | 3 +- .../filetests/runtests/stack-addr-64.clif | 2 +- .../filetests/filetests/runtests/stack.clif | 2 +- .../filetests/filetests/runtests/umulhi.clif | 2 +- 77 files changed, 131 insertions(+), 490 deletions(-) delete mode 100644 cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif delete mode 100644 cranelift/filetests/filetests/runtests/i128-extend-2.clif delete mode 100644 cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif delete mode 100644 cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif delete mode 100644 cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif index 6b53f3c3bd..8678e7f66a 100644 --- a/cranelift/filetests/filetests/isa/x64/bextend.clif +++ b/cranelift/filetests/filetests/isa/x64/bextend.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(b8) -> b64 { block0(v0: b8): diff --git a/cranelift/filetests/filetests/runtests/alias.clif b/cranelift/filetests/filetests/runtests/alias.clif index cf5e99ca95..61ee5af491 100644 --- a/cranelift/filetests/filetests/runtests/alias.clif +++ b/cranelift/filetests/filetests/runtests/alias.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %alias(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/arithmetic.clif b/cranelift/filetests/filetests/runtests/arithmetic.clif index c3cfe07c4c..28936f45e4 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %add_i64(i64, i64) -> i64 { block0(v0: i64,v1: i64): diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif index f48f4a953e..b697a9279e 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif @@ -1,7 +1,7 @@ test run target aarch64 target aarch64 has_lse -target x86_64 machinst +target x86_64 ; TODO: Merge this with atomic-rmw.clif when s390x supports it diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw.clif b/cranelift/filetests/filetests/runtests/atomic-rmw.clif index f96f645c33..57e18a0dbe 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw.clif @@ -1,7 +1,7 @@ test run target aarch64 target aarch64 has_lse -target x86_64 machinst +target x86_64 target s390x ; We can't test that these instructions are right regarding atomicity, but we can diff --git a/cranelift/filetests/filetests/runtests/bint.clif b/cranelift/filetests/filetests/runtests/bint.clif index 30bb91be11..cce35d1c7f 100644 --- a/cranelift/filetests/filetests/runtests/bint.clif +++ b/cranelift/filetests/filetests/runtests/bint.clif @@ -1,6 +1,6 @@ test run target aarch64 -target x86_64 machinst +target x86_64 function %bint_b8_i16_true() -> i16 { block0: diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index f84e276f47..1524e04bb5 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -2,8 +2,7 @@ test run target aarch64 target arm target s390x -; target x86_64 machinst TODO: Not yet implemented on x86_64 -target x86_64 legacy +; target x86_64 TODO: Not yet implemented on x86_64 function %bnot_band() -> b1 { diff --git a/cranelift/filetests/filetests/runtests/br.clif b/cranelift/filetests/filetests/runtests/br.clif index 2d56d9ae95..b6c1f1d282 100644 --- a/cranelift/filetests/filetests/runtests/br.clif +++ b/cranelift/filetests/filetests/runtests/br.clif @@ -3,8 +3,7 @@ test run target aarch64 target arm target s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 function %jump() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/br_icmp.clif b/cranelift/filetests/filetests/runtests/br_icmp.clif index cfb448a33b..0806ff1adb 100644 --- a/cranelift/filetests/filetests/runtests/br_icmp.clif +++ b/cranelift/filetests/filetests/runtests/br_icmp.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %bricmp_eq_i64(i64, i64) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif b/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif index 71c3a43169..d05b83251d 100644 --- a/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif +++ b/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 ; TODO: Merge this with the main br_icmp file when s390x supports overflows. ; See: https://github.com/bytecodealliance/wasmtime/issues/3060 diff --git a/cranelift/filetests/filetests/runtests/br_table.clif b/cranelift/filetests/filetests/runtests/br_table.clif index 16718d4e76..e58dda3cfe 100644 --- a/cranelift/filetests/filetests/runtests/br_table.clif +++ b/cranelift/filetests/filetests/runtests/br_table.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 target s390x diff --git a/cranelift/filetests/filetests/runtests/const.clif b/cranelift/filetests/filetests/runtests/const.clif index b3067c6d8a..ec91e171bb 100644 --- a/cranelift/filetests/filetests/runtests/const.clif +++ b/cranelift/filetests/filetests/runtests/const.clif @@ -2,8 +2,7 @@ test run target aarch64 target arm target s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 function %i8_iconst_0() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/div-checks.clif b/cranelift/filetests/filetests/runtests/div-checks.clif index 7e362598d1..beb1a077ff 100644 --- a/cranelift/filetests/filetests/runtests/div-checks.clif +++ b/cranelift/filetests/filetests/runtests/div-checks.clif @@ -3,7 +3,7 @@ target aarch64 target arm target s390x set avoid_div_traps=false -target x86_64 machinst +target x86_64 function %i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/extend.clif b/cranelift/filetests/filetests/runtests/extend.clif index 69be211032..dc89220aa4 100644 --- a/cranelift/filetests/filetests/runtests/extend.clif +++ b/cranelift/filetests/filetests/runtests/extend.clif @@ -2,8 +2,7 @@ test run target aarch64 target arm target s390x -; target x86_64 machinst TODO: Not yet implemented on x86_64 -target i686 legacy +; target x86_64 TODO: Not yet implemented on x86_64 function %uextend() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif index 5bd7b07ada..9bbba57559 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif @@ -2,7 +2,7 @@ test run ; target s390x TODO: Not yet implemented on s390x set enable_simd target aarch64 -target x86_64 machinst skylake +target x86_64 skylake function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { block0(v0:f32x4, v1:f32x4): diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif index 7fd70504f1..628dcc15a9 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif @@ -2,7 +2,7 @@ test run ; target s390x TODO: Not yet implemented on s390x target aarch64 set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %fmin_pseudo_f32(f32, f32) -> f32 { block0(v0:f32, v1:f32): diff --git a/cranelift/filetests/filetests/runtests/heap.clif b/cranelift/filetests/filetests/runtests/heap.clif index b203705131..35d80a5151 100644 --- a/cranelift/filetests/filetests/runtests/heap.clif +++ b/cranelift/filetests/filetests/runtests/heap.clif @@ -1,5 +1,5 @@ test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif deleted file mode 100644 index d5590b2564..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif +++ /dev/null @@ -1,20 +0,0 @@ -test run -target x86_64 legacy haswell - -function %test_imul_i128() -> b1 { -block0: - v11 = iconst.i64 0xf2347ac4503f1e24 - v12 = iconst.i64 0x0098fe985354ab06 - v1 = iconcat v11, v12 - v21 = iconst.i64 0xf606ba453589ef89 - v22 = iconst.i64 0x042e1f3054ca7432 - v2 = iconcat v21, v22 - v31 = iconst.i64 0xbe2044b2742ebd44 - v32 = iconst.i64 0xa363ce3b6849f307 - v3 = iconcat v31, v32 - v4 = imul v1, v2 - v5 = icmp eq v3, v4 - return v5 -} - -; run diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif index f891239155..da51097ec4 100644 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif @@ -2,7 +2,7 @@ test interpret test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %add_i128(i128, i128) -> i128 { block0(v0: i128,v1: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bint.clif b/cranelift/filetests/filetests/runtests/i128-bint.clif index f959af583b..83c9152d51 100644 --- a/cranelift/filetests/filetests/runtests/i128-bint.clif +++ b/cranelift/filetests/filetests/runtests/i128-bint.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %bint_b8_i128() -> i128 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif index 627dbf3e91..60d74e874b 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %ctz_i128(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bitops.clif b/cranelift/filetests/filetests/runtests/i128-bitops.clif index 4a86f43fe5..72db60a970 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %bnot_i128(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index 5e30ad90fd..c685c45ee0 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -1,8 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %reverse_bits_zero() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index d755a5bd06..0ed0e9caf2 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -1,8 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %i128_brz(i128) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/i128-concat-split.clif b/cranelift/filetests/filetests/runtests/i128-concat-split.clif index 56abdddca5..1559148c81 100644 --- a/cranelift/filetests/filetests/runtests/i128-concat-split.clif +++ b/cranelift/filetests/filetests/runtests/i128-concat-split.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %iconcat_isplit(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/runtests/i128-const.clif b/cranelift/filetests/filetests/runtests/i128-const.clif index 65975c680c..379a928b45 100644 --- a/cranelift/filetests/filetests/runtests/i128-const.clif +++ b/cranelift/filetests/filetests/runtests/i128-const.clif @@ -3,7 +3,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 ; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst +target x86_64 function %i128_const_0() -> i128 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-extend-2.clif b/cranelift/filetests/filetests/runtests/i128-extend-2.clif deleted file mode 100644 index 0d5ce42a58..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-extend-2.clif +++ /dev/null @@ -1,64 +0,0 @@ -test run -set enable_llvm_abi_extensions=true -target aarch64 -target x86_64 machinst -; TODO: Merge this file with i128-extend once the x86 legacy backend is removed - -function %i128_uextend_i32(i32) -> i128 { -block0(v0: i32): - v1 = uextend.i128 v0 - return v1 -} -; run: %i128_uextend_i32(0) == 0 -; run: %i128_uextend_i32(-1) == 0x00000000_00000000_00000000_ffffffff -; run: %i128_uextend_i32(0xffff_eeee) == 0x00000000_00000000_00000000_ffffeeee - -function %i128_sextend_i32(i32) -> i128 { -block0(v0: i32): - v1 = sextend.i128 v0 - return v1 -} -; run: %i128_sextend_i32(0) == 0 -; run: %i128_sextend_i32(-1) == -1 -; run: %i128_sextend_i32(0x7fff_ffff) == 0x00000000_00000000_00000000_7fffffff -; run: %i128_sextend_i32(0xffff_eeee) == 0xffffffff_ffffffff_ffffffff_ffff_eeee - - -function %i128_uextend_i16(i16) -> i128 { -block0(v0: i16): - v1 = uextend.i128 v0 - return v1 -} -; run: %i128_uextend_i16(0) == 0 -; run: %i128_uextend_i16(-1) == 0x00000000_00000000_00000000_0000ffff -; run: %i128_uextend_i16(0xffee) == 0x00000000_00000000_00000000_0000ffee - -function %i128_sextend_i16(i16) -> i128 { -block0(v0: i16): - v1 = sextend.i128 v0 - return v1 -} -; run: %i128_sextend_i16(0) == 0 -; run: %i128_sextend_i16(-1) == -1 -; run: %i128_sextend_i16(0x7fff) == 0x00000000_00000000_00000000_00007fff -; run: %i128_sextend_i16(0xffee) == 0xffffffff_ffffffff_ffffffff_ffffffee - - -function %i128_uextend_i8(i8) -> i128 { -block0(v0: i8): - v1 = uextend.i128 v0 - return v1 -} -; run: %i128_uextend_i8(0) == 0 -; run: %i128_uextend_i8(-1) == 0x00000000_00000000_00000000_000000ff -; run: %i128_uextend_i8(0xfe) == 0x00000000_00000000_00000000_000000fe - -function %i128_sextend_i8(i8) -> i128 { -block0(v0: i8): - v1 = sextend.i128 v0 - return v1 -} -; run: %i128_sextend_i8(0) == 0 -; run: %i128_sextend_i8(-1) == -1 -; run: %i128_sextend_i8(0x7f) == 0x00000000_00000000_00000000_0000007f -; run: %i128_sextend_i8(0xfe) == 0xffffffff_ffffffff_ffffffff_fffffffe diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 1789299479..a0d1b67276 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -1,8 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %i128_uextend_i64(i64) -> i128 { block0(v0: i64): @@ -22,3 +21,62 @@ block0(v0: i64): ; run: %i128_sextend_i64(-1) == -1 ; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == 0x00000000_00000000_7fffffffffffffff ; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == 0xffffffff_ffffffff_ffffffff_eeee0000 + +function %i128_uextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i32(0) == 0 +; run: %i128_uextend_i32(-1) == 0x00000000_00000000_00000000_ffffffff +; run: %i128_uextend_i32(0xffff_eeee) == 0x00000000_00000000_00000000_ffffeeee + +function %i128_sextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i32(0) == 0 +; run: %i128_sextend_i32(-1) == -1 +; run: %i128_sextend_i32(0x7fff_ffff) == 0x00000000_00000000_00000000_7fffffff +; run: %i128_sextend_i32(0xffff_eeee) == 0xffffffff_ffffffff_ffffffff_ffff_eeee + + +function %i128_uextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i16(0) == 0 +; run: %i128_uextend_i16(-1) == 0x00000000_00000000_00000000_0000ffff +; run: %i128_uextend_i16(0xffee) == 0x00000000_00000000_00000000_0000ffee + +function %i128_sextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i16(0) == 0 +; run: %i128_sextend_i16(-1) == -1 +; run: %i128_sextend_i16(0x7fff) == 0x00000000_00000000_00000000_00007fff +; run: %i128_sextend_i16(0xffee) == 0xffffffff_ffffffff_ffffffff_ffffffee + + +function %i128_uextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i8(0) == 0 +; run: %i128_uextend_i8(-1) == 0x00000000_00000000_00000000_000000ff +; run: %i128_uextend_i8(0xfe) == 0x00000000_00000000_00000000_000000fe + +function %i128_sextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i8(0) == 0 +; run: %i128_sextend_i8(-1) == -1 +; run: %i128_sextend_i8(0x7f) == 0x00000000_00000000_00000000_0000007f +; run: %i128_sextend_i8(0xfe) == 0xffffffff_ffffffff_ffffffff_fffffffe diff --git a/cranelift/filetests/filetests/runtests/i128-icmp.clif b/cranelift/filetests/filetests/runtests/i128-icmp.clif index 5758d5ef35..6fb3f1808b 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp.clif @@ -2,7 +2,7 @@ test interpret test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %icmp_eq_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-load-store.clif b/cranelift/filetests/filetests/runtests/i128-load-store.clif index 4d4c9e11dd..dc389bc049 100644 --- a/cranelift/filetests/filetests/runtests/i128-load-store.clif +++ b/cranelift/filetests/filetests/runtests/i128-load-store.clif @@ -1,6 +1,6 @@ test run set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 target aarch64 function %i128_stack_store_load(i128) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/i128-reduce.clif b/cranelift/filetests/filetests/runtests/i128-reduce.clif index 15baef87f1..0554aafeda 100644 --- a/cranelift/filetests/filetests/runtests/i128-reduce.clif +++ b/cranelift/filetests/filetests/runtests/i128-reduce.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %ireduce_128_64(i128) -> i64 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index 47a4ab9ea5..8e0b23d0a9 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %rotl(i128, i8) -> i128 { block0(v0: i128, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/i128-select.clif b/cranelift/filetests/filetests/runtests/i128-select.clif index d557d31e2f..bf643a9e2c 100644 --- a/cranelift/filetests/filetests/runtests/i128-select.clif +++ b/cranelift/filetests/filetests/runtests/i128-select.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %i128_select(b1, i128, i128) -> i128 { block0(v0: b1, v1: i128, v2: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index 41cef85e16..7cd37995a5 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -1,7 +1,7 @@ test run set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 function %ishl_i128_i128(i128, i8) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/icmp-eq.clif b/cranelift/filetests/filetests/runtests/icmp-eq.clif index 4708e6da3e..ccd9c2ffe8 100644 --- a/cranelift/filetests/filetests/runtests/icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/icmp-eq.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_eq_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ne.clif b/cranelift/filetests/filetests/runtests/icmp-ne.clif index 79dd304ed4..6e841b876b 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ne.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ne_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-nof.clif b/cranelift/filetests/filetests/runtests/icmp-nof.clif index 1814fa1547..6817b01651 100644 --- a/cranelift/filetests/filetests/runtests/icmp-nof.clif +++ b/cranelift/filetests/filetests/runtests/icmp-nof.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 function %icmp_nof_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-of.clif b/cranelift/filetests/filetests/runtests/icmp-of.clif index d45917ffd1..26565d4ce2 100644 --- a/cranelift/filetests/filetests/runtests/icmp-of.clif +++ b/cranelift/filetests/filetests/runtests/icmp-of.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 function %icmp_of_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-sge.clif b/cranelift/filetests/filetests/runtests/icmp-sge.clif index ccdcf80215..98981981e7 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sge.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sge_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-sgt.clif b/cranelift/filetests/filetests/runtests/icmp-sgt.clif index 9395ab6af1..c5f036b39f 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sgt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sgt_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-sle.clif b/cranelift/filetests/filetests/runtests/icmp-sle.clif index 040f6a3b29..586c3a0c1b 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sle.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sle_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-slt.clif b/cranelift/filetests/filetests/runtests/icmp-slt.clif index 826676b460..89b37904b0 100644 --- a/cranelift/filetests/filetests/runtests/icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-slt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_slt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-uge.clif b/cranelift/filetests/filetests/runtests/icmp-uge.clif index bdc73e6e8e..8e6bda917f 100644 --- a/cranelift/filetests/filetests/runtests/icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-uge.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_uge_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ugt.clif b/cranelift/filetests/filetests/runtests/icmp-ugt.clif index 98d5634157..5e99ec4ff2 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ugt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ugt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ule.clif b/cranelift/filetests/filetests/runtests/icmp-ule.clif index d7d0a72a3a..38ce80a309 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ule.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ule_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ult.clif b/cranelift/filetests/filetests/runtests/icmp-ult.clif index cd1e794deb..c0821afb87 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ult.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ult_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp.clif b/cranelift/filetests/filetests/runtests/icmp.clif index 5f13fdffec..e33d3728e5 100644 --- a/cranelift/filetests/filetests/runtests/icmp.clif +++ b/cranelift/filetests/filetests/runtests/icmp.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 ; This test is also a regression test for aarch64. ; We were not correctly handling the fact that the rhs constant value diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif index a7b27ca3ed..c39ff3a0d4 100644 --- a/cranelift/filetests/filetests/runtests/shifts.clif +++ b/cranelift/filetests/filetests/runtests/shifts.clif @@ -1,6 +1,6 @@ test run target aarch64 -target x86_64 machinst +target x86_64 target s390x diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif index 323579b6ce..caa49d9534 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif @@ -3,7 +3,7 @@ ; simd-arithmetic-nondeterministic*.clif as well. test run set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %fmax_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif index 5397f5d874..1ca8e8fcfa 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst skylake -set enable_simd -target x86_64 legacy skylake +target x86_64 skylake function %iadd_i32x4(i32x4, i32x4) -> i32x4 { block0(v0:i32x4, v1:i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif index ae23e1552c..a2086b0426 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif @@ -3,12 +3,7 @@ target aarch64 ; target s390x TODO: Not yet implemented on s390x set opt_level=speed_and_size set enable_simd -target x86_64 machinst skylake -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy haswell - -;; x86_64 legacy: Test if bitselect->vselect optimization works properly +target x86_64 skylake function %mask_from_icmp(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif b/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif index 4f66e51ec4..af7b24d5e6 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif @@ -1,7 +1,7 @@ test run set enable_simd target aarch64 -target x86_64 legacy skylake +target x86_64 skylake ; TODO: once available, replace all lane extraction with `icmp + all_ones` diff --git a/cranelift/filetests/filetests/runtests/simd-bitwise.clif b/cranelift/filetests/filetests/runtests/simd-bitwise.clif index 44474b5b80..670844db22 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitwise.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitwise.clif @@ -2,7 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bitselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16, v2: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif b/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif deleted file mode 100644 index 4ed7ae8224..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif +++ /dev/null @@ -1,44 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %maxs_i8x16() -> b1 { -block0: - v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with - ; signed max - v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - v2 = x86_pmaxs v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %maxu_i16x8() -> b1 { -block0: - v0 = vconst.i16x8 [0 1 1 1 1 1 1 1] - v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max - v2 = x86_pmaxu v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %mins_i32x4() -> b1 { -block0: - v0 = vconst.i32x4 [0 1 1 1] - v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min - v2 = x86_pmins v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %minu_i8x16() -> b1 { -block0: - v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min - v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] - v2 = x86_pminu v0, v1 - v8 = vall_true v2 - return v8 -} -; run diff --git a/cranelift/filetests/filetests/runtests/simd-comparison.clif b/cranelift/filetests/filetests/runtests/simd-comparison.clif index 33402b1175..c704e5a3b5 100644 --- a/cranelift/filetests/filetests/runtests/simd-comparison.clif +++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy +target x86_64 function %icmp_eq_i8x16() -> b8 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif index 4f984b8c48..b950a9a2cd 100644 --- a/cranelift/filetests/filetests/runtests/simd-conversion.clif +++ b/cranelift/filetests/filetests/runtests/simd-conversion.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy +target x86_64 function %fcvt_from_sint(i32x4) -> f32x4 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif index 022815c4a4..ee1db6762c 100644 --- a/cranelift/filetests/filetests/runtests/simd-iabs.clif +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %iabs_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif b/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif deleted file mode 100644 index 3318635035..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif +++ /dev/null @@ -1,221 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %shuffle_different_ssa_values() -> b1 { -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] - v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 - v3 = extractlane.i8x16 v2, 15 - v4 = iconst.i8 42 - v5 = icmp eq v3, v4 - return v5 -} -; run - -function %shuffle_same_ssa_value() -> b1 { -block0: - v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax - v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes - v2 = extractlane.i8x16 v1, 4 - v3 = iconst.i8 0x01 - v4 = icmp eq v2, v3 - return v4 -} -; run - -function %compare_shuffle() -> b1 { -block0: - v1 = vconst.i32x4 [0 1 2 3] - v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; keep each lane in place from the first vector - v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v4 = raw_bitcast.i32x4 v3 - v5 = extractlane.i32x4 v4, 3 - v6 = icmp_imm eq v5, 3 - v7 = extractlane.i32x4 v4, 0 - v8 = icmp_imm eq v7, 0 - v9 = band v6, v8 - return v9 -} -; run - -function %compare_shuffle() -> b32 { -block0: - v1 = vconst.b32x4 [true false true false] - v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; pair up the true values to make the entire vector true - v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11] - v4 = raw_bitcast.b32x4 v3 - v5 = extractlane v4, 3 - v6 = extractlane v4, 0 - v7 = band v5, v6 - return v7 -} -; run - -; TODO once SIMD vector comparison is implemented, remove use of extractlane below - -function %insertlane_b8() -> b8 { -block0: - v1 = bconst.b8 true - v2 = vconst.b8x16 [false false false false false false false false false false false false false - false false false] - v3 = insertlane v2, v1, 10 - v4 = extractlane v3, 10 - return v4 -} -; run - -function %insertlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 0x00 - v2 = insertlane v1, v0, 1 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %insertlane_f64_lane1() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, v0, 1 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %insertlane_f64_lane0() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, v0, 0 - v3 = extractlane v2, 0 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %extractlane_b8() -> b8 { -block0: - v1 = vconst.b8x16 [false false false false false false false false false false true false false - false false false] - v2 = extractlane v1, 10 - return v2 -} -; run - -function %extractlane_i16() -> b1 { -block0: - v0 = vconst.i16x8 0x00080007000600050004000300020001 - v1 = extractlane v0, 1 - v2 = icmp_imm eq v1, 2 - return v2 -} -; run - -function %extractlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] - v2 = extractlane v1, 3 - v3 = fcmp eq v2, v0 - return v3 -} -; run - -function %extractlane_i32_with_vector_reuse() -> b1 { -block0: - v0 = iconst.i32 42 - v1 = iconst.i32 99 - - v2 = splat.i32x4 v0 - v3 = insertlane v2, v1, 2 - - v4 = extractlane v3, 3 - v5 = icmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = icmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run - -function %extractlane_f32_with_vector_reuse() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = f32const 0x99.99 - - v2 = splat.f32x4 v0 - v3 = insertlane v2, v1, 2 - - v4 = extractlane v3, 3 - v5 = fcmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = fcmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run - -function %swizzle() -> b1 { -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42] - v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0 - - v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %swizzle_with_overflow() -> b1 { -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0) - - v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %unpack_low() -> b1 { -block0: - v0 = vconst.i32x4 [0 1 2 3] - v1 = vconst.i32x4 [4 5 6 7] - v2 = x86_punpckl v0, v1 - - v3 = vconst.i32x4 [0 4 1 5] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %snarrow(i32x4, i32x4) -> i16x8 { -block0(v0: i32x4, v1: i32x4): - v2 = snarrow v0, v1 - return v2 -} -; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff] - -function %unarrow(i32x4, i32x4) -> i16x8 { -block0(v0: i32x4, v1: i32x4): - v2 = unarrow v0, v1 - return v2 -} -; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access.clif b/cranelift/filetests/filetests/runtests/simd-lane-access.clif index 4ab67d9177..7510cd8865 100644 --- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif +++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif @@ -2,7 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst +target x86_64 ;; shuffle diff --git a/cranelift/filetests/filetests/runtests/simd-logical.clif b/cranelift/filetests/filetests/runtests/simd-logical.clif index 6480dd98f7..9cdf8fdb69 100644 --- a/cranelift/filetests/filetests/runtests/simd-logical.clif +++ b/cranelift/filetests/filetests/runtests/simd-logical.clif @@ -2,7 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst +target x86_64 function %bnot() -> b32 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif index a6ada04f22..1faa3592ad 100644 --- a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { block0(v0: i16x8, v1: i16x8): diff --git a/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif index 6242859e8d..47d4229c61 100644 --- a/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif +++ b/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swidenhigh_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-swidenlow.clif b/cranelift/filetests/filetests/runtests/simd-swidenlow.clif index 38f100ef29..997734702b 100644 --- a/cranelift/filetests/filetests/runtests/simd-swidenlow.clif +++ b/cranelift/filetests/filetests/runtests/simd-swidenlow.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swidenlow_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-swizzle.clif b/cranelift/filetests/filetests/runtests/simd-swizzle.clif index 6cad36f033..6375e0b0e8 100644 --- a/cranelift/filetests/filetests/runtests/simd-swizzle.clif +++ b/cranelift/filetests/filetests/runtests/simd-swizzle.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swizzle_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif index 281e63ac02..d9e4b2b0e0 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %uwidenhigh_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif index 8b865a305b..6acf4e1a2b 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %uwidenlow_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-valltrue.clif b/cranelift/filetests/filetests/runtests/simd-valltrue.clif index ced9a6bb0d..c39a2702e6 100644 --- a/cranelift/filetests/filetests/runtests/simd-valltrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %vall_true_b8x16(b8x16) -> b1 { block0(v0: b8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif index cf407ea606..74b99d785e 100644 --- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %vany_true_b8x16(b8x16) -> b1 { block0(v0: b8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif b/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif deleted file mode 100644 index 8d28c6b0de..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif +++ /dev/null @@ -1,46 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %vconst_syntax() -> b1 { -block0: - v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001 ; build constant using hexadecimal syntax - v1 = vconst.i32x4 [1 2 3 4] ; build constant using literal list syntax - - ; verify lane 1 matches - v2 = extractlane v0, 1 - v3 = extractlane v1, 1 - v4 = icmp eq v3, v2 - - ; verify lane 1 has the correct value - v5 = icmp_imm eq v3, 2 - - v6 = band v4, v5 - return v6 -} -; run - -; Since both jump tables and constants are emitted after the function body, it is important that any RIP-relative -; addressing of constants is not incorrect in the presence of jump tables. This test confirms that, even when both -; jump tables and constants are emitted, the constant addressing works correctly. -function %vconst_with_jumptables() -> b1 { -jt0 = jump_table [block0] - -block10: - v10 = iconst.i64 0 - br_table v10, block1, jt0 -block0: - v0 = iconst.i64 100 - jump block11(v0) -block1: - v1 = iconst.i64 101 - jump block11(v1) -block11(v11: i64): - v12 = icmp_imm eq v11, 100 ; We should have jumped through block 0. - v13 = vconst.i32x4 [1 2 3 4] - v14 = extractlane.i32x4 v13, 1 ; Extract the second element... - v15 = icmp_imm eq v14, 2 ; ...which should be the value 2. - v16 = band v12, v15 - return v16 -} -; run diff --git a/cranelift/filetests/filetests/runtests/simd-vconst.clif b/cranelift/filetests/filetests/runtests/simd-vconst.clif index 49b89a0330..f1a98e7ea1 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst.clif @@ -2,11 +2,7 @@ test run ; target s390x TODO: Not yet implemented on s390x target aarch64 set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy -set enable_simd -target x86_64 legacy skylake +target x86_64 function %vconst_zeroes() -> b1 { diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif index a5d7146f11..2a9c5d1a75 100644 --- a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %vhighbits_i8x16(i8x16) -> i16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 5c910df60d..84a2479ca5 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -3,7 +3,7 @@ test run ; target s390x TODO: Not yet implemented on s390x target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %vselect_i8x16() -> i8x16 { block0: diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index b2fe1072ed..ec855002a4 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %smulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): diff --git a/cranelift/filetests/filetests/runtests/spill-reload.clif b/cranelift/filetests/filetests/runtests/spill-reload.clif index 63f8c50576..f2c6bd0fe6 100644 --- a/cranelift/filetests/filetests/runtests/spill-reload.clif +++ b/cranelift/filetests/filetests/runtests/spill-reload.clif @@ -1,8 +1,7 @@ test run target s390x target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 { block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32): diff --git a/cranelift/filetests/filetests/runtests/stack-addr-64.clif b/cranelift/filetests/filetests/runtests/stack-addr-64.clif index 0e59e7c410..5dd4527024 100644 --- a/cranelift/filetests/filetests/runtests/stack-addr-64.clif +++ b/cranelift/filetests/filetests/runtests/stack-addr-64.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/stack.clif b/cranelift/filetests/filetests/runtests/stack.clif index 0a7323c4d9..efcc25f02e 100644 --- a/cranelift/filetests/filetests/runtests/stack.clif +++ b/cranelift/filetests/filetests/runtests/stack.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/umulhi.clif b/cranelift/filetests/filetests/runtests/umulhi.clif index bb6b06bedd..fd643151ab 100644 --- a/cranelift/filetests/filetests/runtests/umulhi.clif +++ b/cranelift/filetests/filetests/runtests/umulhi.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %umulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): From 71907184a5091b1b1ccfbf057a341349b2897eea Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 18:21:47 +0200 Subject: [PATCH 10/14] Rustfmt --- tests/all/debug/translate.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/all/debug/translate.rs b/tests/all/debug/translate.rs index aa1b79343b..db5e2af935 100644 --- a/tests/all/debug/translate.rs +++ b/tests/all/debug/translate.rs @@ -108,4 +108,3 @@ check: DW_AT_decl_line (10) "##, ) } - From a2040542ce1e331d43e3f58bd57322906f411625 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 18:24:24 +0200 Subject: [PATCH 11/14] Remove unused fields --- cranelift/codegen/meta/src/shared/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs index 521e058ec9..549ae7cf99 100644 --- a/cranelift/codegen/meta/src/shared/mod.rs +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -22,9 +22,6 @@ use std::rc::Rc; pub(crate) struct Definitions { pub settings: SettingGroup, pub all_instructions: AllInstructions, - pub imm: Immediates, - pub formats: Formats, - pub entities: EntityRefs, } pub(crate) fn define() -> Definitions { @@ -38,9 +35,6 @@ pub(crate) fn define() -> Definitions { Definitions { settings: settings::define(), all_instructions, - imm: immediates, - formats, - entities, } } From 3fae9e5fa9c62a6d28f40eb79b879c6e876c6106 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 18:43:04 +0200 Subject: [PATCH 12/14] Remove outdated tests from cranelift-codegen-meta --- cranelift/codegen/meta/src/cdsl/instructions.rs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index ba9aeebad6..72979f9350 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -719,13 +719,6 @@ mod test { inst.bind(LaneType::Int(I32)); } - #[test] - fn ensure_bound_instructions_can_bind_immediates() { - let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); - let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal)); - assert!(bound_inst.verify_bindings().is_ok()); - } - #[test] #[should_panic] fn ensure_instructions_fail_to_bind() { @@ -742,14 +735,4 @@ mod test { let inst = build_fake_instruction(vec![in1], vec![]); inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64)); } - - #[test] - #[should_panic] - fn ensure_instructions_fail_to_bind_too_many_immediates() { - let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); - inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))) - .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))); - // Trying to bind too many immediates to an instruction should fail; note that the immediate - // values are nonsensical but irrelevant to the purpose of this test. - } } From 4b6d20d03ff5e3980df1dd0d00be9eba56affc26 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 19:45:49 +0200 Subject: [PATCH 13/14] Fix extend test for AArch64 --- .../filetests/filetests/runtests/extend.clif | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/extend.clif b/cranelift/filetests/filetests/runtests/extend.clif index dc89220aa4..524177de10 100644 --- a/cranelift/filetests/filetests/runtests/extend.clif +++ b/cranelift/filetests/filetests/runtests/extend.clif @@ -2,17 +2,14 @@ test run target aarch64 target arm target s390x -; target x86_64 TODO: Not yet implemented on x86_64 +target x86_64 function %uextend() -> b1 { block0: v0 = iconst.i32 0xffff_ee00 v1 = uextend.i64 v0 - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0 - v6 = band v4, v5 - return v6 + v2 = icmp_imm eq v1, 0xffff_ee00 + return v2 } ; run @@ -20,10 +17,7 @@ function %sextend() -> b1 { block0: v0 = iconst.i32 0xffff_ee00 v1 = sextend.i64 v0 - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0xffff_ffff - v6 = band v4, v5 - return v6 + v2 = icmp_imm eq v1, 0xffff_ffff_ffff_ee00 + return v2 } ; run From 463a88e002251d4ba032f925a4be9420edb877a3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 29 Sep 2021 22:37:45 +0200 Subject: [PATCH 14/14] Rename lookup_variant to lookup --- cranelift/codegen/src/isa/mod.rs | 12 +++--------- cranelift/native/src/lib.rs | 2 +- cranelift/reader/src/parser.rs | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 069324f040..8de000409f 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -111,9 +111,9 @@ macro_rules! isa_builder { }}; } -/// Look for an ISA for the given `triple`, selecting the backend variant given -/// by `variant` if available. -pub fn lookup_variant(triple: Triple) -> Result { +/// Look for an ISA for the given `triple`. +/// Return a builder that can create a corresponding `TargetIsa`. +pub fn lookup(triple: Triple) -> Result { match triple.architecture { Architecture::X86_64 => { isa_builder!(x64, (feature = "x86"), triple) @@ -125,12 +125,6 @@ pub fn lookup_variant(triple: Triple) -> Result { } } -/// Look for an ISA for the given `triple`. -/// Return a builder that can create a corresponding `TargetIsa`. -pub fn lookup(triple: Triple) -> Result { - lookup_variant(triple) -} - /// Look for a supported ISA with the given `name`. /// Return a builder that can create a corresponding `TargetIsa`. pub fn lookup_by_name(name: &str) -> Result { diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index c2a5aa78b8..c80898a24b 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -41,7 +41,7 @@ pub fn builder() -> Result { /// useful when more than oen backend exists for a given target /// (e.g., on x86-64). pub fn builder_with_options(infer_native_flags: bool) -> Result { - let mut isa_builder = isa::lookup_variant(Triple::host()).map_err(|err| match err { + let mut isa_builder = isa::lookup(Triple::host()).map_err(|err| match err { isa::LookupError::SupportDisabled => "support for architecture disabled at compile time", isa::LookupError::Unsupported => "unsupported architecture", })?; diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 98c46ab2e3..1daa593501 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -1159,7 +1159,7 @@ impl<'a> Parser<'a> { Ok(triple) => triple, Err(err) => return err!(loc, err), }; - let mut isa_builder = match isa::lookup_variant(triple) { + let mut isa_builder = match isa::lookup(triple) { Err(isa::LookupError::SupportDisabled) => { continue; }