x64: Clarify and shrink up ModRM/SIB encoding (#6181)

I noticed recently that for the `ImmRegRegShift` addressing mode
Cranelift will unconditionally emit at least a 1-byte immediate for the
offset to be added to the register addition computation, even when the
offset is zero. In this case though the instruction encoding can be
slightly more compact and remove a byte. This commit started off by
applying this optimization, which resulted in the `*.clif` test changes
in this commit.

Further reading this code, however, I personally found it quite hard to
follow what was happening with all the various branches and ModRM/SIB
bits. I reviewed these encodings in the x64 architecture manual and
attempted to improve the logic for encoding here. The new version in
this commit is intended to be functionally equivalent to the prior
version where dropping a zero-offset from the `ImmRegRegShift` variant
is the only change.
This commit is contained in:
Alex Crichton
2023-04-10 14:37:19 -05:00
committed by GitHub
parent 8f1a7773a3
commit 435b6894d7
4 changed files with 119 additions and 79 deletions

View File

@@ -343,7 +343,7 @@ block2:
; movl %edi, %r10d
; cmpl %r9d, %r10d
; cmovbl %r10d, %r9d
; leaq 0xa(%rip), %rax
; leaq 9(%rip), %rax
; movslq (%rax, %r9, 4), %rcx
; addq %rcx, %rax
; jmpq *%rax
@@ -353,14 +353,14 @@ block2:
; addb %al, (%rax)
; sbbb %al, (%rax)
; addb %al, (%rax)
; block2: ; offset 0x31
; jmp 0x3d
; block3: ; offset 0x36
; block2: ; offset 0x30
; jmp 0x3c
; block3: ; offset 0x35
; xorl %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
; block4: ; offset 0x3d
; block4: ; offset 0x3c
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
@@ -938,7 +938,7 @@ block5(v5: i32):
; movl %edi, %ecx
; cmpl %eax, %ecx
; cmovbl %ecx, %eax
; leaq 0xb(%rip), %r9
; leaq 0xa(%rip), %r9
; movslq (%r9, %rax, 4), %r10
; addq %r10, %r9
; jmpq *%r9
@@ -950,20 +950,20 @@ block5(v5: i32):
; addb %al, (%rax)
; addb %dh, (%rdi)
; addb %al, (%rax)
; block2: ; offset 0x36
; jmp 0x45
; block3: ; offset 0x3b
; block2: ; offset 0x35
; jmp 0x44
; block3: ; offset 0x3a
; movl $3, %esi
; jmp 0x5e
; block4: ; offset 0x45
; jmp 0x5d
; block4: ; offset 0x44
; movl $2, %esi
; jmp 0x5e
; block5: ; offset 0x4f
; jmp 0x5d
; block5: ; offset 0x4e
; movl $1, %esi
; jmp 0x5e
; block6: ; offset 0x59
; jmp 0x5d
; block6: ; offset 0x58
; movl $4, %esi
; block7: ; offset 0x5e
; block7: ; offset 0x5d
; leal (%rdi, %rsi), %eax
; movq %rbp, %rsp
; popq %rbp
@@ -1026,7 +1026,7 @@ block1(v5: i32):
; movl %edi, %r9d
; cmpl %r8d, %r9d
; cmovbl %r9d, %r8d
; leaq 0xa(%rip), %rdi
; leaq 9(%rip), %rdi
; movslq (%rdi, %r8, 4), %rcx
; addq %rcx, %rdi
; jmpq *%rdi
@@ -1040,20 +1040,20 @@ block1(v5: i32):
; addb %al, (%rax)
; xorb $0, %al
; addb %al, (%rax)
; block2: ; offset 0x4f
; jmp 0x6f
; block3: ; offset 0x54
; block2: ; offset 0x4e
; jmp 0x6e
; block3: ; offset 0x53
; movq %r10, %rax
; jmp 0x6f
; block4: ; offset 0x5c
; jmp 0x6e
; block4: ; offset 0x5b
; movq %r11, %rax
; jmp 0x6f
; block5: ; offset 0x64
; jmp 0x6e
; block5: ; offset 0x63
; movq %r11, %rax
; jmp 0x6f
; block6: ; offset 0x6c
; jmp 0x6e
; block6: ; offset 0x6b
; movq %rsi, %rax
; block7: ; offset 0x6f
; block7: ; offset 0x6e
; movq %rbp, %rsp
; popq %rbp
; retq

View File

@@ -1415,6 +1415,7 @@ block0(v0: i8x16, v1: i32):
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %bh, %bh
function %i8x16_shl_imm(i8x16) -> i8x16 {
block0(v0: i8x16):
@@ -1658,7 +1659,7 @@ block0(v0: i8x16, v1: i32):
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %bh, %bh
; addb %al, (%rax)
function %i8x16_ushr_imm(i8x16) -> i8x16 {
block0(v0: i8x16):

View File

@@ -365,6 +365,7 @@ block0(v0: i32):
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %bh, %bh
function %ishl_i8x16_imm(i8x16) -> i8x16 {
block0(v0: i8x16):