From 07e1f682d08379628d72f8606692a79dfc1e9a0a Mon Sep 17 00:00:00 2001 From: Denis Merigoux Date: Mon, 31 Jul 2017 14:52:39 -0700 Subject: [PATCH] Added Intel x86-64 encodings for 64bit loads and store instructions (#127) * Added Intel x86-64 encodings for 64bit loads and store instructions * Using GPR registers instead of ABCD for istore8 with REX prefix Fixed testing of 64bit intel encoding * Emit REX and REX-less encodings for optional REX prefix Value renumbering in binary64.cton --- cranelift/filetests/isa/intel/binary64.cton | 376 ++++++++++++++++---- lib/cretonne/meta/base/instructions.py | 4 +- lib/cretonne/meta/isa/intel/encodings.py | 86 +++-- lib/cretonne/src/isa/intel/binemit.rs | 9 + 4 files changed, 373 insertions(+), 102 deletions(-) diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index 3d93ba86ec..bc19e039d3 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -145,52 +145,199 @@ ebb0: ; asm: movq %rcx, %r10 [-,%r10] v112 = copy v1 ; bin: 49 89 ca + ; Load/Store instructions. + + ; Register indirect addressing with no displacement. + + ; asm: movq %rcx, (%rsi) + store v1, v2 ; bin: 48 89 0e + ; asm: movq %rsi, (%rcx) + store v2, v1 ; bin: 48 89 31 + ; asm: movl %ecx, (%rsi) + istore32 v1, v2 ; bin: 40 89 0e + ; asm: movl %esi, (%rcx) + istore32 v2, v1 ; bin: 40 89 31 + ; asm: movw %cx, (%rsi) + istore16 v1, v2 ; bin: 66 40 89 0e + ; asm: movw %si, (%rcx) + istore16 v2, v1 ; bin: 66 40 89 31 + ; asm: movb %cl, (%rsi) + istore8 v1, v2 ; bin: 40 88 0e + ; asm: movb %sil, (%rcx) + istore8 v2, v1 ; bin: 40 88 31 + + ; asm: movq (%rcx), %rdi + [-,%rdi] v120 = load.i64 v1 ; bin: 48 8b 39 + ; asm: movq (%rsi), %rdx + [-,%rdx] v121 = load.i64 v2 ; bin: 48 8b 16 + ; asm: movl (%rcx), %edi + [-,%rdi] v122 = uload32.i64 v1 ; bin: 40 8b 39 + ; asm: movl (%rsi), %edx + [-,%rdx] v123 = uload32.i64 v2 ; bin: 40 8b 16 + ; asm: movslq (%rcx), %rdi + [-,%rdi] v124 = sload32.i64 v1 ; bin: 48 63 39 + ; asm: movslq (%rsi), %rdx + [-,%rdx] v125 = sload32.i64 v2 ; bin: 48 63 16 + ; asm: movzwq (%rcx), %rdi + [-,%rdi] v126 = uload16.i64 v1 ; bin: 48 0f b7 39 + ; asm: movzwq (%rsi), %rdx + [-,%rdx] v127 = uload16.i64 v2 ; bin: 48 0f b7 16 + ; asm: movswq (%rcx), %rdi + [-,%rdi] v128 = sload16.i64 v1 ; bin: 48 0f bf 39 + ; asm: movswq (%rsi), %rdx + [-,%rdx] v129 = sload16.i64 v2 ; bin: 48 0f bf 16 + ; asm: movzbq (%rcx), %rdi + [-,%rdi] v130 = uload8.i64 v1 ; bin: 48 0f b6 39 + ; asm: movzbq (%rsi), %rdx + [-,%rdx] v131 = uload8.i64 v2 ; bin: 48 0f b6 16 + ; asm: movsbq (%rcx), %rdi + [-,%rdi] v132 = sload8.i64 v1 ; bin: 48 0f be 39 + ; asm: movsbq (%rsi), %rdx + [-,%rdx] v133 = sload8.i64 v2 ; bin: 48 0f be 16 + + ; Register-indirect with 8-bit signed displacement. + + ; asm: movq %rcx, 100(%rsi) + store v1, v2+100 ; bin: 48 89 4e 64 + ; asm: movq %rsi, -100(%rcx) + store v2, v1-100 ; bin: 48 89 71 9c + ; asm: movl %ecx, 100(%rsi) + istore32 v1, v2+100 ; bin: 40 89 4e 64 + ; asm: movl %esi, -100(%rcx) + istore32 v2, v1-100 ; bin: 40 89 71 9c + ; asm: movw %cx, 100(%rsi) + istore16 v1, v2+100 ; bin: 66 40 89 4e 64 + ; asm: movw %si, -100(%rcx) + istore16 v2, v1-100 ; bin: 66 40 89 71 9c + ; asm: movb %cl, 100(%rsi) + istore8 v1, v2+100 ; bin: 40 88 4e 64 + ; asm: movb %sil, 100(%rcx) + istore8 v2, v1+100 ; bin: 40 88 71 64 + + ; asm: movq 50(%rcx), %rdi + [-,%rdi] v140 = load.i64 v1+50 ; bin: 48 8b 79 32 + ; asm: movq -50(%rsi), %rdx + [-,%rdx] v141 = load.i64 v2-50 ; bin: 48 8b 56 ce + ; asm: movl 50(%rcx), %edi + [-,%rdi] v142 = uload32.i64 v1+50 ; bin: 40 8b 79 32 + ; asm: movl -50(%rsi), %edx + [-,%rdx] v143 = uload32.i64 v2-50 ; bin: 40 8b 56 ce + ; asm: movslq 50(%rcx), %rdi + [-,%rdi] v144 = sload32.i64 v1+50 ; bin: 48 63 79 32 + ; asm: movslq -50(%rsi), %rdx + [-,%rdx] v145 = sload32.i64 v2-50 ; bin: 48 63 56 ce + ; asm: movzwq 50(%rcx), %rdi + [-,%rdi] v146 = uload16.i64 v1+50 ; bin: 48 0f b7 79 32 + ; asm: movzwq -50(%rsi), %rdx + [-,%rdx] v147 = uload16.i64 v2-50 ; bin: 48 0f b7 56 ce + ; asm: movswq 50(%rcx), %rdi + [-,%rdi] v148 = sload16.i64 v1+50 ; bin: 48 0f bf 79 32 + ; asm: movswq -50(%rsi), %rdx + [-,%rdx] v149 = sload16.i64 v2-50 ; bin: 48 0f bf 56 ce + ; asm: movzbq 50(%rcx), %rdi + [-,%rdi] v150 = uload8.i64 v1+50 ; bin: 48 0f b6 79 32 + ; asm: movzbq -50(%rsi), %rdx + [-,%rdx] v151 = uload8.i64 v2-50 ; bin: 48 0f b6 56 ce + ; asm: movsbq 50(%rcx), %rdi + [-,%rdi] v152 = sload8.i64 v1+50 ; bin: 48 0f be 79 32 + ; asm: movsbq -50(%rsi), %rdx + [-,%rdx] v153 = sload8.i64 v2-50 ; bin: 48 0f be 56 ce + + ; Register-indirect with 32-bit signed displacement. + + ; asm: movq %rcx, 10000(%rsi) + store v1, v2+10000 ; bin: 48 89 8e 00002710 + ; asm: movq %rsi, -10000(%rcx) + store v2, v1-10000 ; bin: 48 89 b1 ffffd8f0 + ; asm: movl %ecx, 10000(%rsi) + istore32 v1, v2+10000 ; bin: 40 89 8e 00002710 + ; asm: movl %esi, -10000(%rcx) + istore32 v2, v1-10000 ; bin: 40 89 b1 ffffd8f0 + ; asm: movw %cx, 10000(%rsi) + istore16 v1, v2+10000 ; bin: 66 40 89 8e 00002710 + ; asm: movw %si, -10000(%rcx) + istore16 v2, v1-10000 ; bin: 66 40 89 b1 ffffd8f0 + ; asm: movb %cl, 10000(%rsi) + istore8 v1, v2+10000 ; bin: 40 88 8e 00002710 + ; asm: movb %sil, 10000(%rcx) + istore8 v2, v1+10000 ; bin: 40 88 b1 00002710 + + ; asm: movq 50000(%rcx), %rdi + [-,%rdi] v160 = load.i64 v1+50000 ; bin: 48 8b b9 0000c350 + ; asm: movq -50000(%rsi), %rdx + [-,%rdx] v161 = load.i64 v2-50000 ; bin: 48 8b 96 ffff3cb0 + ; asm: movl 50000(%rcx), %edi + [-,%rdi] v162 = uload32.i64 v1+50000 ; bin: 40 8b b9 0000c350 + ; asm: movl -50000(%rsi), %edx + [-,%rdx] v163 = uload32.i64 v2-50000 ; bin: 40 8b 96 ffff3cb0 + ; asm: movslq 50000(%rcx), %rdi + [-,%rdi] v164 = sload32.i64 v1+50000 ; bin: 48 63 b9 0000c350 + ; asm: movslq -50000(%rsi), %rdx + [-,%rdx] v165 = sload32.i64 v2-50000 ; bin: 48 63 96 ffff3cb0 + ; asm: movzwq 50000(%rcx), %rdi + [-,%rdi] v166 = uload16.i64 v1+50000 ; bin: 48 0f b7 b9 0000c350 + ; asm: movzwq -50000(%rsi), %rdx + [-,%rdx] v167 = uload16.i64 v2-50000 ; bin: 48 0f b7 96 ffff3cb0 + ; asm: movswq 50000(%rcx), %rdi + [-,%rdi] v168 = sload16.i64 v1+50000 ; bin: 48 0f bf b9 0000c350 + ; asm: movswq -50000(%rsi), %rdx + [-,%rdx] v169 = sload16.i64 v2-50000 ; bin: 48 0f bf 96 ffff3cb0 + ; asm: movzbq 50000(%rcx), %rdi + [-,%rdi] v170 = uload8.i64 v1+50000 ; bin: 48 0f b6 b9 0000c350 + ; asm: movzbq -50000(%rsi), %rdx + [-,%rdx] v171 = uload8.i64 v2-50000 ; bin: 48 0f b6 96 ffff3cb0 + ; asm: movsbq 50000(%rcx), %rdi + [-,%rdi] v172 = sload8.i64 v1+50000 ; bin: 48 0f be b9 0000c350 + ; asm: movsbq -50000(%rsi), %rdx + [-,%rdx] v173 = sload8.i64 v2-50000 ; bin: 48 0f be 96 ffff3cb0 + + ; More arithmetic. ; asm: imulq %rsi, %rcx - [-,%rcx] v120 = imul v1, v2 ; bin: 48 0f af ce + [-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce ; asm: imulq %r10, %rsi - [-,%rsi] v121 = imul v2, v3 ; bin: 49 0f af f2 + [-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2 ; asm: imulq %rcx, %r10 - [-,%r10] v122 = imul v3, v1 ; bin: 4c 0f af d1 + [-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1 - [-,%rax] v130 = iconst.i64 1 - [-,%rdx] v131 = iconst.i64 2 + [-,%rax] v190 = iconst.i64 1 + [-,%rdx] v191 = iconst.i64 2 ; asm: idivq %rcx - [-,%rax,%rdx] v132, v133 = x86_sdivmodx v130, v131, v1 ; bin: 48 f7 f9 + [-,%rax,%rdx] v192, v193 = x86_sdivmodx v130, v131, v1 ; bin: 48 f7 f9 ; asm: idivq %rsi - [-,%rax,%rdx] v134, v135 = x86_sdivmodx v130, v131, v2 ; bin: 48 f7 fe + [-,%rax,%rdx] v194, v195 = x86_sdivmodx v130, v131, v2 ; bin: 48 f7 fe ; asm: idivq %r10 - [-,%rax,%rdx] v136, v137 = x86_sdivmodx v130, v131, v3 ; bin: 49 f7 fa + [-,%rax,%rdx] v196, v197 = x86_sdivmodx v130, v131, v3 ; bin: 49 f7 fa ; asm: divq %rcx - [-,%rax,%rdx] v138, v139 = x86_udivmodx v130, v131, v1 ; bin: 48 f7 f1 + [-,%rax,%rdx] v198, v199 = x86_udivmodx v130, v131, v1 ; bin: 48 f7 f1 ; asm: divq %rsi - [-,%rax,%rdx] v140, v141 = x86_udivmodx v130, v131, v2 ; bin: 48 f7 f6 + [-,%rax,%rdx] v200, v201 = x86_udivmodx v130, v131, v2 ; bin: 48 f7 f6 ; asm: divq %r10 - [-,%rax,%rdx] v142, v143 = x86_udivmodx v130, v131, v3 ; bin: 49 f7 f2 + [-,%rax,%rdx] v202, v203 = x86_udivmodx v130, v131, v3 ; bin: 49 f7 f2 ; Bit-counting instructions. ; asm: popcntq %rsi, %rcx - [-,%rcx] v200 = popcnt v2 ; bin: f3 48 0f b8 ce + [-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce ; asm: popcntq %r10, %rsi - [-,%rsi] v201 = popcnt v3 ; bin: f3 49 0f b8 f2 + [-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2 ; asm: popcntq %rcx, %r10 - [-,%r10] v202 = popcnt v1 ; bin: f3 4c 0f b8 d1 + [-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1 ; asm: lzcntq %rsi, %rcx - [-,%rcx] v203 = clz v2 ; bin: f3 48 0f bd ce + [-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce ; asm: lzcntq %r10, %rsi - [-,%rsi] v204 = clz v3 ; bin: f3 49 0f bd f2 + [-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2 ; asm: lzcntq %rcx, %r10 - [-,%r10] v205 = clz v1 ; bin: f3 4c 0f bd d1 + [-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1 ; asm: tzcntq %rsi, %rcx - [-,%rcx] v206 = ctz v2 ; bin: f3 48 0f bc ce + [-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce ; asm: tzcntq %r10, %rsi - [-,%rsi] v207 = ctz v3 ; bin: f3 49 0f bc f2 + [-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2 ; asm: tzcntq %rcx, %r10 - [-,%r10] v208 = ctz v1 ; bin: f3 4c 0f bc d1 + [-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1 ; Integer comparisons. @@ -327,146 +474,217 @@ ebb0: ; asm: movl $0x88001122, %r14d [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 + ; Load/Store instructions. + + ; Register indirect addressing with no displacement. + + ; asm: movl (%rcx), %edi + [-,%rdi] v10 = load.i32 v1 ; bin: 40 8b 39 + ; asm: movl (%rsi), %edx + [-,%rdx] v11 = load.i32 v2 ; bin: 40 8b 16 + ; asm: movzwl (%rcx), %edi + [-,%rdi] v12 = uload16.i32 v1 ; bin: 40 0f b7 39 + ; asm: movzwl (%rsi), %edx + [-,%rdx] v13 = uload16.i32 v2 ; bin: 40 0f b7 16 + ; asm: movswl (%rcx), %edi + [-,%rdi] v14 = sload16.i32 v1 ; bin: 40 0f bf 39 + ; asm: movswl (%rsi), %edx + [-,%rdx] v15 = sload16.i32 v2 ; bin: 40 0f bf 16 + ; asm: movzbl (%rcx), %edi + [-,%rdi] v16 = uload8.i32 v1 ; bin: 40 0f b6 39 + ; asm: movzbl (%rsi), %edx + [-,%rdx] v17 = uload8.i32 v2 ; bin: 40 0f b6 16 + ; asm: movsbl (%rcx), %edi + [-,%rdi] v18 = sload8.i32 v1 ; bin: 40 0f be 39 + ; asm: movsbl (%rsi), %edx + [-,%rdx] v19 = sload8.i32 v2 ; bin: 40 0f be 16 + + ; Register-indirect with 8-bit signed displacement. + + ; asm: movl 50(%rcx), %edi + [-,%rdi] v20 = load.i32 v1+50 ; bin: 40 8b 79 32 + ; asm: movl -50(%rsi), %edx + [-,%rdx] v21 = load.i32 v2-50 ; bin: 40 8b 56 ce + ; asm: movzwl 50(%rcx), %edi + [-,%rdi] v22 = uload16.i32 v1+50 ; bin: 40 0f b7 79 32 + ; asm: movzwl -50(%rsi), %edx + [-,%rdx] v23 = uload16.i32 v2-50 ; bin: 40 0f b7 56 ce + ; asm: movswl 50(%rcx), %edi + [-,%rdi] v24 = sload16.i32 v1+50 ; bin: 40 0f bf 79 32 + ; asm: movswl -50(%rsi), %edx + [-,%rdx] v25 = sload16.i32 v2-50 ; bin: 40 0f bf 56 ce + ; asm: movzbl 50(%rcx), %edi + [-,%rdi] v26 = uload8.i32 v1+50 ; bin: 40 0f b6 79 32 + ; asm: movzbl -50(%rsi), %edx + [-,%rdx] v27 = uload8.i32 v2-50 ; bin: 40 0f b6 56 ce + ; asm: movsbl 50(%rcx), %edi + [-,%rdi] v28 = sload8.i32 v1+50 ; bin: 40 0f be 79 32 + ; asm: movsbl -50(%rsi), %edx + [-,%rdx] v29 = sload8.i32 v2-50 ; bin: 40 0f be 56 ce + + ; Register-indirect with 32-bit signed displacement. + + ; asm: movl 50000(%rcx), %edi + [-,%rdi] v30 = load.i32 v1+50000 ; bin: 40 8b b9 0000c350 + ; asm: movl -50000(%rsi), %edx + [-,%rdx] v31 = load.i32 v2-50000 ; bin: 40 8b 96 ffff3cb0 + ; asm: movzwl 50000(%rcx), %edi + [-,%rdi] v32 = uload16.i32 v1+50000 ; bin: 40 0f b7 b9 0000c350 + ; asm: movzwl -50000(%rsi), %edx + [-,%rdx] v33 = uload16.i32 v2-50000 ; bin: 40 0f b7 96 ffff3cb0 + ; asm: movswl 50000(%rcx), %edi + [-,%rdi] v34 = sload16.i32 v1+50000 ; bin: 40 0f bf b9 0000c350 + ; asm: movswl -50000(%rsi), %edx + [-,%rdx] v35 = sload16.i32 v2-50000 ; bin: 40 0f bf 96 ffff3cb0 + ; asm: movzbl 50000(%rcx), %edi + [-,%rdi] v36 = uload8.i32 v1+50000 ; bin: 40 0f b6 b9 0000c350 + ; asm: movzbl -50000(%rsi), %edx + [-,%rdx] v37 = uload8.i32 v2-50000 ; bin: 40 0f b6 96 ffff3cb0 + ; asm: movsbl 50000(%rcx), %edi + [-,%rdi] v38 = sload8.i32 v1+50000 ; bin: 40 0f be b9 0000c350 + ; asm: movsbl -50000(%rsi), %edx + [-,%rdx] v39 = sload8.i32 v2-50000 ; bin: 40 0f be 96 ffff3cb0 + ; Integer Register-Register Operations. ; asm: addl %esi, %ecx - [-,%rcx] v10 = iadd v1, v2 ; bin: 40 01 f1 + [-,%rcx] v40 = iadd v1, v2 ; bin: 40 01 f1 ; asm: addl %r10d, %esi - [-,%rsi] v11 = iadd v2, v3 ; bin: 44 01 d6 + [-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6 ; asm: addl %ecx, %r10d - [-,%r10] v12 = iadd v3, v1 ; bin: 41 01 ca + [-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca ; asm: subl %esi, %ecx - [-,%rcx] v20 = isub v1, v2 ; bin: 40 29 f1 + [-,%rcx] v50 = isub v1, v2 ; bin: 40 29 f1 ; asm: subl %r10d, %esi - [-,%rsi] v21 = isub v2, v3 ; bin: 44 29 d6 + [-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6 ; asm: subl %ecx, %r10d - [-,%r10] v22 = isub v3, v1 ; bin: 41 29 ca + [-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca ; asm: andl %esi, %ecx - [-,%rcx] v30 = band v1, v2 ; bin: 40 21 f1 + [-,%rcx] v60 = band v1, v2 ; bin: 40 21 f1 ; asm: andl %r10d, %esi - [-,%rsi] v31 = band v2, v3 ; bin: 44 21 d6 + [-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6 ; asm: andl %ecx, %r10d - [-,%r10] v32 = band v3, v1 ; bin: 41 21 ca + [-,%r10] v62 = band v3, v1 ; bin: 41 21 ca ; asm: orl %esi, %ecx - [-,%rcx] v40 = bor v1, v2 ; bin: 40 09 f1 + [-,%rcx] v70 = bor v1, v2 ; bin: 40 09 f1 ; asm: orl %r10d, %esi - [-,%rsi] v41 = bor v2, v3 ; bin: 44 09 d6 + [-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6 ; asm: orl %ecx, %r10d - [-,%r10] v42 = bor v3, v1 ; bin: 41 09 ca + [-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca ; asm: xorl %esi, %ecx - [-,%rcx] v50 = bxor v1, v2 ; bin: 40 31 f1 + [-,%rcx] v80 = bxor v1, v2 ; bin: 40 31 f1 ; asm: xorl %r10d, %esi - [-,%rsi] v51 = bxor v2, v3 ; bin: 44 31 d6 + [-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6 ; asm: xorl %ecx, %r10d - [-,%r10] v52 = bxor v3, v1 ; bin: 41 31 ca + [-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca ; asm: shll %cl, %esi - [-,%rsi] v60 = ishl v2, v1 ; bin: 40 d3 e6 + [-,%rsi] v90 = ishl v2, v1 ; bin: 40 d3 e6 ; asm: shll %cl, %r10d - [-,%r10] v61 = ishl v3, v1 ; bin: 41 d3 e2 + [-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2 ; asm: sarl %cl, %esi - [-,%rsi] v62 = sshr v2, v1 ; bin: 40 d3 fe + [-,%rsi] v92 = sshr v2, v1 ; bin: 40 d3 fe ; asm: sarl %cl, %r10d - [-,%r10] v63 = sshr v3, v1 ; bin: 41 d3 fa + [-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa ; asm: shrl %cl, %esi - [-,%rsi] v64 = ushr v2, v1 ; bin: 40 d3 ee + [-,%rsi] v94 = ushr v2, v1 ; bin: 40 d3 ee ; asm: shrl %cl, %r10d - [-,%r10] v65 = ushr v3, v1 ; bin: 41 d3 ea + [-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea ; asm: roll %cl, %esi - [-,%rsi] v66 = rotl v2, v1 ; bin: 40 d3 c6 + [-,%rsi] v96 = rotl v2, v1 ; bin: 40 d3 c6 ; asm: roll %cl, %r10d - [-,%r10] v67 = rotl v3, v1 ; bin: 41 d3 c2 + [-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2 ; asm: rorl %cl, %esi - [-,%rsi] v68 = rotr v2, v1 ; bin: 40 d3 ce + [-,%rsi] v98 = rotr v2, v1 ; bin: 40 d3 ce ; asm: rorl %cl, %r10d - [-,%r10] v69 = rotr v3, v1 ; bin: 41 d3 ca + [-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca ; Integer Register-Immediate Operations. ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. ; Some take 8-bit immediates that are sign-extended to 64 bits. ; asm: addl $-100000, %ecx - [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960 + [-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960 ; asm: addl $100000, %esi - [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0 + [-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0 ; asm: addl $0x7fffffff, %r10d - [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff + [-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff ; asm: addl $100, %r8d - [-,%r8] v73 = iadd_imm v4, 100 ; bin: 41 83 c0 64 + [-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64 ; asm: addl $-100, %r14d - [-,%r14] v74 = iadd_imm v5, -100 ; bin: 41 83 c6 9c + [-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c ; asm: andl $-100000, %ecx - [-,%rcx] v80 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960 + [-,%rcx] v110 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960 ; asm: andl $100000, %esi - [-,%rsi] v81 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0 + [-,%rsi] v111 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0 ; asm: andl $0x7fffffff, %r10d - [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff + [-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff ; asm: andl $100, %r8d - [-,%r8] v83 = band_imm v4, 100 ; bin: 41 83 e0 64 + [-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64 ; asm: andl $-100, %r14d - [-,%r14] v84 = band_imm v5, -100 ; bin: 41 83 e6 9c + [-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c ; asm: orl $-100000, %ecx - [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960 + [-,%rcx] v120 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960 ; asm: orl $100000, %esi - [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0 + [-,%rsi] v121 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0 ; asm: orl $0x7fffffff, %r10d - [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff + [-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff ; asm: orl $100, %r8d - [-,%r8] v93 = bor_imm v4, 100 ; bin: 41 83 c8 64 + [-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64 ; asm: orl $-100, %r14d - [-,%r14] v94 = bor_imm v5, -100 ; bin: 41 83 ce 9c + [-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c ; asm: ret ; asm: xorl $-100000, %ecx - [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960 + [-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960 ; asm: xorl $100000, %esi - [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0 + [-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0 ; asm: xorl $0x7fffffff, %r10d - [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff + [-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff ; asm: xorl $100, %r8d - [-,%r8] v103 = bxor_imm v4, 100 ; bin: 41 83 f0 64 + [-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64 ; asm: xorl $-100, %r14d - [-,%r14] v104 = bxor_imm v5, -100 ; bin: 41 83 f6 9c + [-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c ; Register copies. ; asm: movl %esi, %ecx - [-,%rcx] v110 = copy v2 ; bin: 40 89 f1 + [-,%rcx] v140 = copy v2 ; bin: 40 89 f1 ; asm: movl %r10d, %esi - [-,%rsi] v111 = copy v3 ; bin: 44 89 d6 + [-,%rsi] v141 = copy v3 ; bin: 44 89 d6 ; asm: movl %ecx, %r10d - [-,%r10] v112 = copy v1 ; bin: 41 89 ca + [-,%r10] v142 = copy v1 ; bin: 41 89 ca ; More arithmetic. ; asm: imull %esi, %ecx - [-,%rcx] v120 = imul v1, v2 ; bin: 40 0f af ce + [-,%rcx] v150 = imul v1, v2 ; bin: 40 0f af ce ; asm: imull %r10d, %esi - [-,%rsi] v121 = imul v2, v3 ; bin: 41 0f af f2 + [-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2 ; asm: imull %ecx, %r10d - [-,%r10] v122 = imul v3, v1 ; bin: 44 0f af d1 + [-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1 - [-,%rax] v130 = iconst.i32 1 - [-,%rdx] v131 = iconst.i32 2 + [-,%rax] v160 = iconst.i32 1 + [-,%rdx] v161 = iconst.i32 2 ; asm: idivl %ecx - [-,%rax,%rdx] v132, v133 = x86_sdivmodx v130, v131, v1 ; bin: 40 f7 f9 + [-,%rax,%rdx] v162, v163 = x86_sdivmodx v130, v131, v1 ; bin: 40 f7 f9 ; asm: idivl %esi - [-,%rax,%rdx] v134, v135 = x86_sdivmodx v130, v131, v2 ; bin: 40 f7 fe + [-,%rax,%rdx] v164, v165 = x86_sdivmodx v130, v131, v2 ; bin: 40 f7 fe ; asm: idivl %r10d - [-,%rax,%rdx] v136, v137 = x86_sdivmodx v130, v131, v3 ; bin: 41 f7 fa + [-,%rax,%rdx] v166, v167 = x86_sdivmodx v130, v131, v3 ; bin: 41 f7 fa ; asm: divl %ecx - [-,%rax,%rdx] v138, v139 = x86_udivmodx v130, v131, v1 ; bin: 40 f7 f1 + [-,%rax,%rdx] v168, v169 = x86_udivmodx v130, v131, v1 ; bin: 40 f7 f1 ; asm: divl %esi - [-,%rax,%rdx] v140, v141 = x86_udivmodx v130, v131, v2 ; bin: 40 f7 f6 + [-,%rax,%rdx] v170, v171 = x86_udivmodx v130, v131, v2 ; bin: 40 f7 f6 ; asm: divl %r10d - [-,%rax,%rdx] v142, v143 = x86_udivmodx v130, v131, v3 ; bin: 41 f7 f2 + [-,%rax,%rdx] v172, v173 = x86_udivmodx v130, v131, v3 ; bin: 41 f7 f2 ; Bit-counting instructions. diff --git a/lib/cretonne/meta/base/instructions.py b/lib/cretonne/meta/base/instructions.py index a26d791719..3a4ce25cda 100644 --- a/lib/cretonne/meta/base/instructions.py +++ b/lib/cretonne/meta/base/instructions.py @@ -271,7 +271,7 @@ istore16 = Instruction( 'istore16', r""" Store the low 16 bits of ``x`` to memory at ``p + Offset``. - This is equivalent to ``ireduce.i16`` followed by ``store.i8``. + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. """, ins=(Flags, x, p, Offset), can_store=True) @@ -301,7 +301,7 @@ istore32 = Instruction( 'istore32', r""" Store the low 32 bits of ``x`` to memory at ``p + Offset``. - This is equivalent to ``ireduce.i32`` followed by ``store.i8``. + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. """, ins=(Flags, x, p, Offset), can_store=True) diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 448252f43a..39b6812058 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -55,6 +55,28 @@ def enc_i32_i64(inst, recipe, *args, **kwargs): I64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs)) +def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs): + # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None + """ + Add encodings for `inst.i32` to I32. + Add encodings for `inst.i32` to I64 with and without REX. + Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit` + argument to determine wheter or not to set the REX.W bit. + """ + I32.enc(inst.i32.any, *recipe(*args, **kwargs)) + + # REX-less encoding must come after REX encoding so we don't use it by + # default. Otherwise reg-alloc would never use r8 and up. + I64.enc(inst.i32.any, *recipe.rex(*args, **kwargs)) + I64.enc(inst.i32.any, *recipe(*args, **kwargs)) + + if w_bit: + I64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs)) + else: + I64.enc(inst.i64.any, *recipe.rex(*args, **kwargs)) + I64.enc(inst.i64.any, *recipe(*args, **kwargs)) + + def enc_flt(inst, recipe, *args, **kwargs): # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None """ @@ -142,38 +164,60 @@ I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1), I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) +# # Loads and stores. -I32.enc(base.store.i32.any, *r.st(0x89)) -I32.enc(base.store.i32.any, *r.stDisp8(0x89)) -I32.enc(base.store.i32.any, *r.stDisp32(0x89)) +# +enc_i32_i64_ld_st(base.store, True, r.st, 0x89) +enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89) +enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89) -I32.enc(base.istore16.i32.any, *r.st(0x66, 0x89)) -I32.enc(base.istore16.i32.any, *r.stDisp8(0x66, 0x89)) -I32.enc(base.istore16.i32.any, *r.stDisp32(0x66, 0x89)) +I64.enc(base.istore32.i64.any, *r.st.rex(0x89)) +I64.enc(base.istore32.i64.any, *r.stDisp8.rex(0x89)) +I64.enc(base.istore32.i64.any, *r.stDisp32.rex(0x89)) +enc_i32_i64_ld_st(base.istore16, False, r.st, 0x66, 0x89) +enc_i32_i64_ld_st(base.istore16, False, r.stDisp8, 0x66, 0x89) +enc_i32_i64_ld_st(base.istore16, False, r.stDisp32, 0x66, 0x89) + +# Byte stores are more complicated because the registers they can address +# depends of the presence of a REX prefix I32.enc(base.istore8.i32.any, *r.st_abcd(0x88)) +I64.enc(base.istore8.i32.any, *r.st_abcd(0x88)) +I64.enc(base.istore8.i64.any, *r.st.rex(0x88)) I32.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88)) +I64.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88)) +I64.enc(base.istore8.i64.any, *r.stDisp8.rex(0x88)) I32.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88)) +I64.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88)) +I64.enc(base.istore8.i64.any, *r.stDisp32.rex(0x88)) -I32.enc(base.load.i32.any, *r.ld(0x8b)) -I32.enc(base.load.i32.any, *r.ldDisp8(0x8b)) -I32.enc(base.load.i32.any, *r.ldDisp32(0x8b)) +enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b) +enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b) +enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b) -I32.enc(base.uload16.i32.any, *r.ld(0x0f, 0xb7)) -I32.enc(base.uload16.i32.any, *r.ldDisp8(0x0f, 0xb7)) -I32.enc(base.uload16.i32.any, *r.ldDisp32(0x0f, 0xb7)) +I64.enc(base.uload32.i64, *r.ld.rex(0x8b)) +I64.enc(base.uload32.i64, *r.ldDisp8.rex(0x8b)) +I64.enc(base.uload32.i64, *r.ldDisp32.rex(0x8b)) -I32.enc(base.sload16.i32.any, *r.ld(0x0f, 0xbf)) -I32.enc(base.sload16.i32.any, *r.ldDisp8(0x0f, 0xbf)) -I32.enc(base.sload16.i32.any, *r.ldDisp32(0x0f, 0xbf)) +I64.enc(base.sload32.i64, *r.ld.rex(0x63, w=1)) +I64.enc(base.sload32.i64, *r.ldDisp8.rex(0x63, w=1)) +I64.enc(base.sload32.i64, *r.ldDisp32.rex(0x63, w=1)) -I32.enc(base.uload8.i32.any, *r.ld(0x0f, 0xb6)) -I32.enc(base.uload8.i32.any, *r.ldDisp8(0x0f, 0xb6)) -I32.enc(base.uload8.i32.any, *r.ldDisp32(0x0f, 0xb6)) +enc_i32_i64_ld_st(base.uload16, True, r.ld, 0x0f, 0xb7) +enc_i32_i64_ld_st(base.uload16, True, r.ldDisp8, 0x0f, 0xb7) +enc_i32_i64_ld_st(base.uload16, True, r.ldDisp32, 0x0f, 0xb7) -I32.enc(base.sload8.i32.any, *r.ld(0x0f, 0xbe)) -I32.enc(base.sload8.i32.any, *r.ldDisp8(0x0f, 0xbe)) -I32.enc(base.sload8.i32.any, *r.ldDisp32(0x0f, 0xbe)) +enc_i32_i64_ld_st(base.sload16, True, r.ld, 0x0f, 0xbf) +enc_i32_i64_ld_st(base.sload16, True, r.ldDisp8, 0x0f, 0xbf) +enc_i32_i64_ld_st(base.sload16, True, r.ldDisp32, 0x0f, 0xbf) + +enc_i32_i64_ld_st(base.uload8, True, r.ld, 0x0f, 0xb6) +enc_i32_i64_ld_st(base.uload8, True, r.ldDisp8, 0x0f, 0xb6) +enc_i32_i64_ld_st(base.uload8, True, r.ldDisp32, 0x0f, 0xb6) + +enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe) +enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe) +enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe) # # Call/return diff --git a/lib/cretonne/src/isa/intel/binemit.rs b/lib/cretonne/src/isa/intel/binemit.rs index 0f3b1b76c6..0eb6199e66 100644 --- a/lib/cretonne/src/isa/intel/binemit.rs +++ b/lib/cretonne/src/isa/intel/binemit.rs @@ -114,6 +114,15 @@ fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { sink.put1(bits as u8); } +// Emit single-byte opcode with mandatory prefix and REX. +fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*"); + let pp = (bits >> 8) & 3; + sink.put1(PREFIX[(pp - 1) as usize]); + rex_prefix(bits, rex, sink); + sink.put1(bits as u8); +} + /// Emit a ModR/M byte for reg-reg operands. fn modrm_rr(rm: RegUnit, reg: RegUnit, sink: &mut CS) { let reg = reg as u8 & 7;