Added Intel x86-64 encodings for 64bit loads and store instructions (#127)

* Added Intel x86-64 encodings for 64bit loads and store instructions

* Using GPR registers instead of ABCD for istore8 with REX prefix
Fixed testing of 64bit intel encoding

* Emit REX and REX-less encodings for optional REX prefix
Value renumbering in binary64.cton
This commit is contained in:
Denis Merigoux
2017-07-31 14:52:39 -07:00
committed by Jakob Stoklund Olesen
parent be8331d0a0
commit b74723cb68
4 changed files with 373 additions and 102 deletions

View File

@@ -145,52 +145,199 @@ ebb0:
; asm: movq %rcx, %r10
[-,%r10] v112 = copy v1 ; bin: 49 89 ca
; Load/Store instructions.
; Register indirect addressing with no displacement.
; asm: movq %rcx, (%rsi)
store v1, v2 ; bin: 48 89 0e
; asm: movq %rsi, (%rcx)
store v2, v1 ; bin: 48 89 31
; asm: movl %ecx, (%rsi)
istore32 v1, v2 ; bin: 40 89 0e
; asm: movl %esi, (%rcx)
istore32 v2, v1 ; bin: 40 89 31
; asm: movw %cx, (%rsi)
istore16 v1, v2 ; bin: 66 40 89 0e
; asm: movw %si, (%rcx)
istore16 v2, v1 ; bin: 66 40 89 31
; asm: movb %cl, (%rsi)
istore8 v1, v2 ; bin: 40 88 0e
; asm: movb %sil, (%rcx)
istore8 v2, v1 ; bin: 40 88 31
; asm: movq (%rcx), %rdi
[-,%rdi] v120 = load.i64 v1 ; bin: 48 8b 39
; asm: movq (%rsi), %rdx
[-,%rdx] v121 = load.i64 v2 ; bin: 48 8b 16
; asm: movl (%rcx), %edi
[-,%rdi] v122 = uload32.i64 v1 ; bin: 40 8b 39
; asm: movl (%rsi), %edx
[-,%rdx] v123 = uload32.i64 v2 ; bin: 40 8b 16
; asm: movslq (%rcx), %rdi
[-,%rdi] v124 = sload32.i64 v1 ; bin: 48 63 39
; asm: movslq (%rsi), %rdx
[-,%rdx] v125 = sload32.i64 v2 ; bin: 48 63 16
; asm: movzwq (%rcx), %rdi
[-,%rdi] v126 = uload16.i64 v1 ; bin: 48 0f b7 39
; asm: movzwq (%rsi), %rdx
[-,%rdx] v127 = uload16.i64 v2 ; bin: 48 0f b7 16
; asm: movswq (%rcx), %rdi
[-,%rdi] v128 = sload16.i64 v1 ; bin: 48 0f bf 39
; asm: movswq (%rsi), %rdx
[-,%rdx] v129 = sload16.i64 v2 ; bin: 48 0f bf 16
; asm: movzbq (%rcx), %rdi
[-,%rdi] v130 = uload8.i64 v1 ; bin: 48 0f b6 39
; asm: movzbq (%rsi), %rdx
[-,%rdx] v131 = uload8.i64 v2 ; bin: 48 0f b6 16
; asm: movsbq (%rcx), %rdi
[-,%rdi] v132 = sload8.i64 v1 ; bin: 48 0f be 39
; asm: movsbq (%rsi), %rdx
[-,%rdx] v133 = sload8.i64 v2 ; bin: 48 0f be 16
; Register-indirect with 8-bit signed displacement.
; asm: movq %rcx, 100(%rsi)
store v1, v2+100 ; bin: 48 89 4e 64
; asm: movq %rsi, -100(%rcx)
store v2, v1-100 ; bin: 48 89 71 9c
; asm: movl %ecx, 100(%rsi)
istore32 v1, v2+100 ; bin: 40 89 4e 64
; asm: movl %esi, -100(%rcx)
istore32 v2, v1-100 ; bin: 40 89 71 9c
; asm: movw %cx, 100(%rsi)
istore16 v1, v2+100 ; bin: 66 40 89 4e 64
; asm: movw %si, -100(%rcx)
istore16 v2, v1-100 ; bin: 66 40 89 71 9c
; asm: movb %cl, 100(%rsi)
istore8 v1, v2+100 ; bin: 40 88 4e 64
; asm: movb %sil, 100(%rcx)
istore8 v2, v1+100 ; bin: 40 88 71 64
; asm: movq 50(%rcx), %rdi
[-,%rdi] v140 = load.i64 v1+50 ; bin: 48 8b 79 32
; asm: movq -50(%rsi), %rdx
[-,%rdx] v141 = load.i64 v2-50 ; bin: 48 8b 56 ce
; asm: movl 50(%rcx), %edi
[-,%rdi] v142 = uload32.i64 v1+50 ; bin: 40 8b 79 32
; asm: movl -50(%rsi), %edx
[-,%rdx] v143 = uload32.i64 v2-50 ; bin: 40 8b 56 ce
; asm: movslq 50(%rcx), %rdi
[-,%rdi] v144 = sload32.i64 v1+50 ; bin: 48 63 79 32
; asm: movslq -50(%rsi), %rdx
[-,%rdx] v145 = sload32.i64 v2-50 ; bin: 48 63 56 ce
; asm: movzwq 50(%rcx), %rdi
[-,%rdi] v146 = uload16.i64 v1+50 ; bin: 48 0f b7 79 32
; asm: movzwq -50(%rsi), %rdx
[-,%rdx] v147 = uload16.i64 v2-50 ; bin: 48 0f b7 56 ce
; asm: movswq 50(%rcx), %rdi
[-,%rdi] v148 = sload16.i64 v1+50 ; bin: 48 0f bf 79 32
; asm: movswq -50(%rsi), %rdx
[-,%rdx] v149 = sload16.i64 v2-50 ; bin: 48 0f bf 56 ce
; asm: movzbq 50(%rcx), %rdi
[-,%rdi] v150 = uload8.i64 v1+50 ; bin: 48 0f b6 79 32
; asm: movzbq -50(%rsi), %rdx
[-,%rdx] v151 = uload8.i64 v2-50 ; bin: 48 0f b6 56 ce
; asm: movsbq 50(%rcx), %rdi
[-,%rdi] v152 = sload8.i64 v1+50 ; bin: 48 0f be 79 32
; asm: movsbq -50(%rsi), %rdx
[-,%rdx] v153 = sload8.i64 v2-50 ; bin: 48 0f be 56 ce
; Register-indirect with 32-bit signed displacement.
; asm: movq %rcx, 10000(%rsi)
store v1, v2+10000 ; bin: 48 89 8e 00002710
; asm: movq %rsi, -10000(%rcx)
store v2, v1-10000 ; bin: 48 89 b1 ffffd8f0
; asm: movl %ecx, 10000(%rsi)
istore32 v1, v2+10000 ; bin: 40 89 8e 00002710
; asm: movl %esi, -10000(%rcx)
istore32 v2, v1-10000 ; bin: 40 89 b1 ffffd8f0
; asm: movw %cx, 10000(%rsi)
istore16 v1, v2+10000 ; bin: 66 40 89 8e 00002710
; asm: movw %si, -10000(%rcx)
istore16 v2, v1-10000 ; bin: 66 40 89 b1 ffffd8f0
; asm: movb %cl, 10000(%rsi)
istore8 v1, v2+10000 ; bin: 40 88 8e 00002710
; asm: movb %sil, 10000(%rcx)
istore8 v2, v1+10000 ; bin: 40 88 b1 00002710
; asm: movq 50000(%rcx), %rdi
[-,%rdi] v160 = load.i64 v1+50000 ; bin: 48 8b b9 0000c350
; asm: movq -50000(%rsi), %rdx
[-,%rdx] v161 = load.i64 v2-50000 ; bin: 48 8b 96 ffff3cb0
; asm: movl 50000(%rcx), %edi
[-,%rdi] v162 = uload32.i64 v1+50000 ; bin: 40 8b b9 0000c350
; asm: movl -50000(%rsi), %edx
[-,%rdx] v163 = uload32.i64 v2-50000 ; bin: 40 8b 96 ffff3cb0
; asm: movslq 50000(%rcx), %rdi
[-,%rdi] v164 = sload32.i64 v1+50000 ; bin: 48 63 b9 0000c350
; asm: movslq -50000(%rsi), %rdx
[-,%rdx] v165 = sload32.i64 v2-50000 ; bin: 48 63 96 ffff3cb0
; asm: movzwq 50000(%rcx), %rdi
[-,%rdi] v166 = uload16.i64 v1+50000 ; bin: 48 0f b7 b9 0000c350
; asm: movzwq -50000(%rsi), %rdx
[-,%rdx] v167 = uload16.i64 v2-50000 ; bin: 48 0f b7 96 ffff3cb0
; asm: movswq 50000(%rcx), %rdi
[-,%rdi] v168 = sload16.i64 v1+50000 ; bin: 48 0f bf b9 0000c350
; asm: movswq -50000(%rsi), %rdx
[-,%rdx] v169 = sload16.i64 v2-50000 ; bin: 48 0f bf 96 ffff3cb0
; asm: movzbq 50000(%rcx), %rdi
[-,%rdi] v170 = uload8.i64 v1+50000 ; bin: 48 0f b6 b9 0000c350
; asm: movzbq -50000(%rsi), %rdx
[-,%rdx] v171 = uload8.i64 v2-50000 ; bin: 48 0f b6 96 ffff3cb0
; asm: movsbq 50000(%rcx), %rdi
[-,%rdi] v172 = sload8.i64 v1+50000 ; bin: 48 0f be b9 0000c350
; asm: movsbq -50000(%rsi), %rdx
[-,%rdx] v173 = sload8.i64 v2-50000 ; bin: 48 0f be 96 ffff3cb0
; More arithmetic.
; asm: imulq %rsi, %rcx
[-,%rcx] v120 = imul v1, v2 ; bin: 48 0f af ce
[-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce
; asm: imulq %r10, %rsi
[-,%rsi] v121 = imul v2, v3 ; bin: 49 0f af f2
[-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2
; asm: imulq %rcx, %r10
[-,%r10] v122 = imul v3, v1 ; bin: 4c 0f af d1
[-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1
[-,%rax] v130 = iconst.i64 1
[-,%rdx] v131 = iconst.i64 2
[-,%rax] v190 = iconst.i64 1
[-,%rdx] v191 = iconst.i64 2
; asm: idivq %rcx
[-,%rax,%rdx] v132, v133 = x86_sdivmodx v130, v131, v1 ; bin: 48 f7 f9
[-,%rax,%rdx] v192, v193 = x86_sdivmodx v130, v131, v1 ; bin: 48 f7 f9
; asm: idivq %rsi
[-,%rax,%rdx] v134, v135 = x86_sdivmodx v130, v131, v2 ; bin: 48 f7 fe
[-,%rax,%rdx] v194, v195 = x86_sdivmodx v130, v131, v2 ; bin: 48 f7 fe
; asm: idivq %r10
[-,%rax,%rdx] v136, v137 = x86_sdivmodx v130, v131, v3 ; bin: 49 f7 fa
[-,%rax,%rdx] v196, v197 = x86_sdivmodx v130, v131, v3 ; bin: 49 f7 fa
; asm: divq %rcx
[-,%rax,%rdx] v138, v139 = x86_udivmodx v130, v131, v1 ; bin: 48 f7 f1
[-,%rax,%rdx] v198, v199 = x86_udivmodx v130, v131, v1 ; bin: 48 f7 f1
; asm: divq %rsi
[-,%rax,%rdx] v140, v141 = x86_udivmodx v130, v131, v2 ; bin: 48 f7 f6
[-,%rax,%rdx] v200, v201 = x86_udivmodx v130, v131, v2 ; bin: 48 f7 f6
; asm: divq %r10
[-,%rax,%rdx] v142, v143 = x86_udivmodx v130, v131, v3 ; bin: 49 f7 f2
[-,%rax,%rdx] v202, v203 = x86_udivmodx v130, v131, v3 ; bin: 49 f7 f2
; Bit-counting instructions.
; asm: popcntq %rsi, %rcx
[-,%rcx] v200 = popcnt v2 ; bin: f3 48 0f b8 ce
[-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce
; asm: popcntq %r10, %rsi
[-,%rsi] v201 = popcnt v3 ; bin: f3 49 0f b8 f2
[-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2
; asm: popcntq %rcx, %r10
[-,%r10] v202 = popcnt v1 ; bin: f3 4c 0f b8 d1
[-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1
; asm: lzcntq %rsi, %rcx
[-,%rcx] v203 = clz v2 ; bin: f3 48 0f bd ce
[-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce
; asm: lzcntq %r10, %rsi
[-,%rsi] v204 = clz v3 ; bin: f3 49 0f bd f2
[-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2
; asm: lzcntq %rcx, %r10
[-,%r10] v205 = clz v1 ; bin: f3 4c 0f bd d1
[-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1
; asm: tzcntq %rsi, %rcx
[-,%rcx] v206 = ctz v2 ; bin: f3 48 0f bc ce
[-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce
; asm: tzcntq %r10, %rsi
[-,%rsi] v207 = ctz v3 ; bin: f3 49 0f bc f2
[-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2
; asm: tzcntq %rcx, %r10
[-,%r10] v208 = ctz v1 ; bin: f3 4c 0f bc d1
[-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1
; Integer comparisons.
@@ -327,146 +474,217 @@ ebb0:
; asm: movl $0x88001122, %r14d
[-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
; Load/Store instructions.
; Register indirect addressing with no displacement.
; asm: movl (%rcx), %edi
[-,%rdi] v10 = load.i32 v1 ; bin: 40 8b 39
; asm: movl (%rsi), %edx
[-,%rdx] v11 = load.i32 v2 ; bin: 40 8b 16
; asm: movzwl (%rcx), %edi
[-,%rdi] v12 = uload16.i32 v1 ; bin: 40 0f b7 39
; asm: movzwl (%rsi), %edx
[-,%rdx] v13 = uload16.i32 v2 ; bin: 40 0f b7 16
; asm: movswl (%rcx), %edi
[-,%rdi] v14 = sload16.i32 v1 ; bin: 40 0f bf 39
; asm: movswl (%rsi), %edx
[-,%rdx] v15 = sload16.i32 v2 ; bin: 40 0f bf 16
; asm: movzbl (%rcx), %edi
[-,%rdi] v16 = uload8.i32 v1 ; bin: 40 0f b6 39
; asm: movzbl (%rsi), %edx
[-,%rdx] v17 = uload8.i32 v2 ; bin: 40 0f b6 16
; asm: movsbl (%rcx), %edi
[-,%rdi] v18 = sload8.i32 v1 ; bin: 40 0f be 39
; asm: movsbl (%rsi), %edx
[-,%rdx] v19 = sload8.i32 v2 ; bin: 40 0f be 16
; Register-indirect with 8-bit signed displacement.
; asm: movl 50(%rcx), %edi
[-,%rdi] v20 = load.i32 v1+50 ; bin: 40 8b 79 32
; asm: movl -50(%rsi), %edx
[-,%rdx] v21 = load.i32 v2-50 ; bin: 40 8b 56 ce
; asm: movzwl 50(%rcx), %edi
[-,%rdi] v22 = uload16.i32 v1+50 ; bin: 40 0f b7 79 32
; asm: movzwl -50(%rsi), %edx
[-,%rdx] v23 = uload16.i32 v2-50 ; bin: 40 0f b7 56 ce
; asm: movswl 50(%rcx), %edi
[-,%rdi] v24 = sload16.i32 v1+50 ; bin: 40 0f bf 79 32
; asm: movswl -50(%rsi), %edx
[-,%rdx] v25 = sload16.i32 v2-50 ; bin: 40 0f bf 56 ce
; asm: movzbl 50(%rcx), %edi
[-,%rdi] v26 = uload8.i32 v1+50 ; bin: 40 0f b6 79 32
; asm: movzbl -50(%rsi), %edx
[-,%rdx] v27 = uload8.i32 v2-50 ; bin: 40 0f b6 56 ce
; asm: movsbl 50(%rcx), %edi
[-,%rdi] v28 = sload8.i32 v1+50 ; bin: 40 0f be 79 32
; asm: movsbl -50(%rsi), %edx
[-,%rdx] v29 = sload8.i32 v2-50 ; bin: 40 0f be 56 ce
; Register-indirect with 32-bit signed displacement.
; asm: movl 50000(%rcx), %edi
[-,%rdi] v30 = load.i32 v1+50000 ; bin: 40 8b b9 0000c350
; asm: movl -50000(%rsi), %edx
[-,%rdx] v31 = load.i32 v2-50000 ; bin: 40 8b 96 ffff3cb0
; asm: movzwl 50000(%rcx), %edi
[-,%rdi] v32 = uload16.i32 v1+50000 ; bin: 40 0f b7 b9 0000c350
; asm: movzwl -50000(%rsi), %edx
[-,%rdx] v33 = uload16.i32 v2-50000 ; bin: 40 0f b7 96 ffff3cb0
; asm: movswl 50000(%rcx), %edi
[-,%rdi] v34 = sload16.i32 v1+50000 ; bin: 40 0f bf b9 0000c350
; asm: movswl -50000(%rsi), %edx
[-,%rdx] v35 = sload16.i32 v2-50000 ; bin: 40 0f bf 96 ffff3cb0
; asm: movzbl 50000(%rcx), %edi
[-,%rdi] v36 = uload8.i32 v1+50000 ; bin: 40 0f b6 b9 0000c350
; asm: movzbl -50000(%rsi), %edx
[-,%rdx] v37 = uload8.i32 v2-50000 ; bin: 40 0f b6 96 ffff3cb0
; asm: movsbl 50000(%rcx), %edi
[-,%rdi] v38 = sload8.i32 v1+50000 ; bin: 40 0f be b9 0000c350
; asm: movsbl -50000(%rsi), %edx
[-,%rdx] v39 = sload8.i32 v2-50000 ; bin: 40 0f be 96 ffff3cb0
; Integer Register-Register Operations.
; asm: addl %esi, %ecx
[-,%rcx] v10 = iadd v1, v2 ; bin: 40 01 f1
[-,%rcx] v40 = iadd v1, v2 ; bin: 40 01 f1
; asm: addl %r10d, %esi
[-,%rsi] v11 = iadd v2, v3 ; bin: 44 01 d6
[-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6
; asm: addl %ecx, %r10d
[-,%r10] v12 = iadd v3, v1 ; bin: 41 01 ca
[-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca
; asm: subl %esi, %ecx
[-,%rcx] v20 = isub v1, v2 ; bin: 40 29 f1
[-,%rcx] v50 = isub v1, v2 ; bin: 40 29 f1
; asm: subl %r10d, %esi
[-,%rsi] v21 = isub v2, v3 ; bin: 44 29 d6
[-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6
; asm: subl %ecx, %r10d
[-,%r10] v22 = isub v3, v1 ; bin: 41 29 ca
[-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca
; asm: andl %esi, %ecx
[-,%rcx] v30 = band v1, v2 ; bin: 40 21 f1
[-,%rcx] v60 = band v1, v2 ; bin: 40 21 f1
; asm: andl %r10d, %esi
[-,%rsi] v31 = band v2, v3 ; bin: 44 21 d6
[-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6
; asm: andl %ecx, %r10d
[-,%r10] v32 = band v3, v1 ; bin: 41 21 ca
[-,%r10] v62 = band v3, v1 ; bin: 41 21 ca
; asm: orl %esi, %ecx
[-,%rcx] v40 = bor v1, v2 ; bin: 40 09 f1
[-,%rcx] v70 = bor v1, v2 ; bin: 40 09 f1
; asm: orl %r10d, %esi
[-,%rsi] v41 = bor v2, v3 ; bin: 44 09 d6
[-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6
; asm: orl %ecx, %r10d
[-,%r10] v42 = bor v3, v1 ; bin: 41 09 ca
[-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca
; asm: xorl %esi, %ecx
[-,%rcx] v50 = bxor v1, v2 ; bin: 40 31 f1
[-,%rcx] v80 = bxor v1, v2 ; bin: 40 31 f1
; asm: xorl %r10d, %esi
[-,%rsi] v51 = bxor v2, v3 ; bin: 44 31 d6
[-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6
; asm: xorl %ecx, %r10d
[-,%r10] v52 = bxor v3, v1 ; bin: 41 31 ca
[-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca
; asm: shll %cl, %esi
[-,%rsi] v60 = ishl v2, v1 ; bin: 40 d3 e6
[-,%rsi] v90 = ishl v2, v1 ; bin: 40 d3 e6
; asm: shll %cl, %r10d
[-,%r10] v61 = ishl v3, v1 ; bin: 41 d3 e2
[-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2
; asm: sarl %cl, %esi
[-,%rsi] v62 = sshr v2, v1 ; bin: 40 d3 fe
[-,%rsi] v92 = sshr v2, v1 ; bin: 40 d3 fe
; asm: sarl %cl, %r10d
[-,%r10] v63 = sshr v3, v1 ; bin: 41 d3 fa
[-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa
; asm: shrl %cl, %esi
[-,%rsi] v64 = ushr v2, v1 ; bin: 40 d3 ee
[-,%rsi] v94 = ushr v2, v1 ; bin: 40 d3 ee
; asm: shrl %cl, %r10d
[-,%r10] v65 = ushr v3, v1 ; bin: 41 d3 ea
[-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea
; asm: roll %cl, %esi
[-,%rsi] v66 = rotl v2, v1 ; bin: 40 d3 c6
[-,%rsi] v96 = rotl v2, v1 ; bin: 40 d3 c6
; asm: roll %cl, %r10d
[-,%r10] v67 = rotl v3, v1 ; bin: 41 d3 c2
[-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2
; asm: rorl %cl, %esi
[-,%rsi] v68 = rotr v2, v1 ; bin: 40 d3 ce
[-,%rsi] v98 = rotr v2, v1 ; bin: 40 d3 ce
; asm: rorl %cl, %r10d
[-,%r10] v69 = rotr v3, v1 ; bin: 41 d3 ca
[-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca
; Integer Register-Immediate Operations.
; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
; Some take 8-bit immediates that are sign-extended to 64 bits.
; asm: addl $-100000, %ecx
[-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960
[-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960
; asm: addl $100000, %esi
[-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0
[-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0
; asm: addl $0x7fffffff, %r10d
[-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
[-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
; asm: addl $100, %r8d
[-,%r8] v73 = iadd_imm v4, 100 ; bin: 41 83 c0 64
[-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64
; asm: addl $-100, %r14d
[-,%r14] v74 = iadd_imm v5, -100 ; bin: 41 83 c6 9c
[-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c
; asm: andl $-100000, %ecx
[-,%rcx] v80 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960
[-,%rcx] v110 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960
; asm: andl $100000, %esi
[-,%rsi] v81 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0
[-,%rsi] v111 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0
; asm: andl $0x7fffffff, %r10d
[-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
[-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
; asm: andl $100, %r8d
[-,%r8] v83 = band_imm v4, 100 ; bin: 41 83 e0 64
[-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64
; asm: andl $-100, %r14d
[-,%r14] v84 = band_imm v5, -100 ; bin: 41 83 e6 9c
[-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c
; asm: orl $-100000, %ecx
[-,%rcx] v90 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960
[-,%rcx] v120 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960
; asm: orl $100000, %esi
[-,%rsi] v91 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0
[-,%rsi] v121 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0
; asm: orl $0x7fffffff, %r10d
[-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff
[-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff
; asm: orl $100, %r8d
[-,%r8] v93 = bor_imm v4, 100 ; bin: 41 83 c8 64
[-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64
; asm: orl $-100, %r14d
[-,%r14] v94 = bor_imm v5, -100 ; bin: 41 83 ce 9c
[-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c
; asm: ret
; asm: xorl $-100000, %ecx
[-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960
[-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960
; asm: xorl $100000, %esi
[-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0
[-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0
; asm: xorl $0x7fffffff, %r10d
[-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
[-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
; asm: xorl $100, %r8d
[-,%r8] v103 = bxor_imm v4, 100 ; bin: 41 83 f0 64
[-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64
; asm: xorl $-100, %r14d
[-,%r14] v104 = bxor_imm v5, -100 ; bin: 41 83 f6 9c
[-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c
; Register copies.
; asm: movl %esi, %ecx
[-,%rcx] v110 = copy v2 ; bin: 40 89 f1
[-,%rcx] v140 = copy v2 ; bin: 40 89 f1
; asm: movl %r10d, %esi
[-,%rsi] v111 = copy v3 ; bin: 44 89 d6
[-,%rsi] v141 = copy v3 ; bin: 44 89 d6
; asm: movl %ecx, %r10d
[-,%r10] v112 = copy v1 ; bin: 41 89 ca
[-,%r10] v142 = copy v1 ; bin: 41 89 ca
; More arithmetic.
; asm: imull %esi, %ecx
[-,%rcx] v120 = imul v1, v2 ; bin: 40 0f af ce
[-,%rcx] v150 = imul v1, v2 ; bin: 40 0f af ce
; asm: imull %r10d, %esi
[-,%rsi] v121 = imul v2, v3 ; bin: 41 0f af f2
[-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2
; asm: imull %ecx, %r10d
[-,%r10] v122 = imul v3, v1 ; bin: 44 0f af d1
[-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1
[-,%rax] v130 = iconst.i32 1
[-,%rdx] v131 = iconst.i32 2
[-,%rax] v160 = iconst.i32 1
[-,%rdx] v161 = iconst.i32 2
; asm: idivl %ecx
[-,%rax,%rdx] v132, v133 = x86_sdivmodx v130, v131, v1 ; bin: 40 f7 f9
[-,%rax,%rdx] v162, v163 = x86_sdivmodx v130, v131, v1 ; bin: 40 f7 f9
; asm: idivl %esi
[-,%rax,%rdx] v134, v135 = x86_sdivmodx v130, v131, v2 ; bin: 40 f7 fe
[-,%rax,%rdx] v164, v165 = x86_sdivmodx v130, v131, v2 ; bin: 40 f7 fe
; asm: idivl %r10d
[-,%rax,%rdx] v136, v137 = x86_sdivmodx v130, v131, v3 ; bin: 41 f7 fa
[-,%rax,%rdx] v166, v167 = x86_sdivmodx v130, v131, v3 ; bin: 41 f7 fa
; asm: divl %ecx
[-,%rax,%rdx] v138, v139 = x86_udivmodx v130, v131, v1 ; bin: 40 f7 f1
[-,%rax,%rdx] v168, v169 = x86_udivmodx v130, v131, v1 ; bin: 40 f7 f1
; asm: divl %esi
[-,%rax,%rdx] v140, v141 = x86_udivmodx v130, v131, v2 ; bin: 40 f7 f6
[-,%rax,%rdx] v170, v171 = x86_udivmodx v130, v131, v2 ; bin: 40 f7 f6
; asm: divl %r10d
[-,%rax,%rdx] v142, v143 = x86_udivmodx v130, v131, v3 ; bin: 41 f7 f2
[-,%rax,%rdx] v172, v173 = x86_udivmodx v130, v131, v3 ; bin: 41 f7 f2
; Bit-counting instructions.

View File

@@ -271,7 +271,7 @@ istore16 = Instruction(
'istore16', r"""
Store the low 16 bits of ``x`` to memory at ``p + Offset``.
This is equivalent to ``ireduce.i16`` followed by ``store.i8``.
This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
""",
ins=(Flags, x, p, Offset), can_store=True)
@@ -301,7 +301,7 @@ istore32 = Instruction(
'istore32', r"""
Store the low 32 bits of ``x`` to memory at ``p + Offset``.
This is equivalent to ``ireduce.i32`` followed by ``store.i8``.
This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
""",
ins=(Flags, x, p, Offset), can_store=True)

View File

@@ -55,6 +55,28 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
I64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst.i32` to I32.
Add encodings for `inst.i32` to I64 with and without REX.
Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit`
argument to determine wheter or not to set the REX.W bit.
"""
I32.enc(inst.i32.any, *recipe(*args, **kwargs))
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
I64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
I64.enc(inst.i32.any, *recipe(*args, **kwargs))
if w_bit:
I64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
else:
I64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
I64.enc(inst.i64.any, *recipe(*args, **kwargs))
def enc_flt(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
@@ -142,38 +164,60 @@ I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
#
# Loads and stores.
I32.enc(base.store.i32.any, *r.st(0x89))
I32.enc(base.store.i32.any, *r.stDisp8(0x89))
I32.enc(base.store.i32.any, *r.stDisp32(0x89))
#
enc_i32_i64_ld_st(base.store, True, r.st, 0x89)
enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89)
enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89)
I32.enc(base.istore16.i32.any, *r.st(0x66, 0x89))
I32.enc(base.istore16.i32.any, *r.stDisp8(0x66, 0x89))
I32.enc(base.istore16.i32.any, *r.stDisp32(0x66, 0x89))
I64.enc(base.istore32.i64.any, *r.st.rex(0x89))
I64.enc(base.istore32.i64.any, *r.stDisp8.rex(0x89))
I64.enc(base.istore32.i64.any, *r.stDisp32.rex(0x89))
enc_i32_i64_ld_st(base.istore16, False, r.st, 0x66, 0x89)
enc_i32_i64_ld_st(base.istore16, False, r.stDisp8, 0x66, 0x89)
enc_i32_i64_ld_st(base.istore16, False, r.stDisp32, 0x66, 0x89)
# Byte stores are more complicated because the registers they can address
# depends of the presence of a REX prefix
I32.enc(base.istore8.i32.any, *r.st_abcd(0x88))
I64.enc(base.istore8.i32.any, *r.st_abcd(0x88))
I64.enc(base.istore8.i64.any, *r.st.rex(0x88))
I32.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
I64.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
I64.enc(base.istore8.i64.any, *r.stDisp8.rex(0x88))
I32.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
I64.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
I64.enc(base.istore8.i64.any, *r.stDisp32.rex(0x88))
I32.enc(base.load.i32.any, *r.ld(0x8b))
I32.enc(base.load.i32.any, *r.ldDisp8(0x8b))
I32.enc(base.load.i32.any, *r.ldDisp32(0x8b))
enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b)
enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b)
enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b)
I32.enc(base.uload16.i32.any, *r.ld(0x0f, 0xb7))
I32.enc(base.uload16.i32.any, *r.ldDisp8(0x0f, 0xb7))
I32.enc(base.uload16.i32.any, *r.ldDisp32(0x0f, 0xb7))
I64.enc(base.uload32.i64, *r.ld.rex(0x8b))
I64.enc(base.uload32.i64, *r.ldDisp8.rex(0x8b))
I64.enc(base.uload32.i64, *r.ldDisp32.rex(0x8b))
I32.enc(base.sload16.i32.any, *r.ld(0x0f, 0xbf))
I32.enc(base.sload16.i32.any, *r.ldDisp8(0x0f, 0xbf))
I32.enc(base.sload16.i32.any, *r.ldDisp32(0x0f, 0xbf))
I64.enc(base.sload32.i64, *r.ld.rex(0x63, w=1))
I64.enc(base.sload32.i64, *r.ldDisp8.rex(0x63, w=1))
I64.enc(base.sload32.i64, *r.ldDisp32.rex(0x63, w=1))
I32.enc(base.uload8.i32.any, *r.ld(0x0f, 0xb6))
I32.enc(base.uload8.i32.any, *r.ldDisp8(0x0f, 0xb6))
I32.enc(base.uload8.i32.any, *r.ldDisp32(0x0f, 0xb6))
enc_i32_i64_ld_st(base.uload16, True, r.ld, 0x0f, 0xb7)
enc_i32_i64_ld_st(base.uload16, True, r.ldDisp8, 0x0f, 0xb7)
enc_i32_i64_ld_st(base.uload16, True, r.ldDisp32, 0x0f, 0xb7)
I32.enc(base.sload8.i32.any, *r.ld(0x0f, 0xbe))
I32.enc(base.sload8.i32.any, *r.ldDisp8(0x0f, 0xbe))
I32.enc(base.sload8.i32.any, *r.ldDisp32(0x0f, 0xbe))
enc_i32_i64_ld_st(base.sload16, True, r.ld, 0x0f, 0xbf)
enc_i32_i64_ld_st(base.sload16, True, r.ldDisp8, 0x0f, 0xbf)
enc_i32_i64_ld_st(base.sload16, True, r.ldDisp32, 0x0f, 0xbf)
enc_i32_i64_ld_st(base.uload8, True, r.ld, 0x0f, 0xb6)
enc_i32_i64_ld_st(base.uload8, True, r.ldDisp8, 0x0f, 0xb6)
enc_i32_i64_ld_st(base.uload8, True, r.ldDisp32, 0x0f, 0xb6)
enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe)
enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe)
enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe)
#
# Call/return

View File

@@ -114,6 +114,15 @@ fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
sink.put1(bits as u8);
}
// Emit single-byte opcode with mandatory prefix and REX.
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
rex_prefix(bits, rex, sink);
sink.put1(bits as u8);
}
/// Emit a ModR/M byte for reg-reg operands.
fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
let reg = reg as u8 & 7;