Add a t8jccd_long encoding recipe for brz.b1 and brnz.b1 in 32-bit mode.

The register allocator can't handle branches with constrained register
operands, and the brz.b1/brnz.b1 instructions only have the t8jccd_abcd
in 32-bit mode where no REX prefixes are possible.

This adds a worst case encoding for those cases where a b1 value lives
in a non-ABCD register.
This commit is contained in:
Jakob Stoklund Olesen
2017-10-11 14:18:21 -07:00
parent ece09f2df2
commit ba52a38597
3 changed files with 53 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
; binary emission of 32-bit code. ; binary emission of 32-bit code.
test binemit test binemit
set is_compressed
isa intel haswell isa intel haswell
; The binary encodings can be verified with the command: ; The binary encodings can be verified with the command:
@@ -393,3 +394,30 @@ ebb1:
ebb2: ebb2:
trap user0 ; bin: 0f 0b trap user0 ; bin: 0f 0b
} }
; Special branch encodings only for I32 mode.
function %special_branches() {
ebb0:
[-,%rcx] v1 = iconst.i32 1
[-,%rsi] v2 = iconst.i32 2
[-,%rdi] v3 = icmp eq v1, v2
[-,%rbx] v4 = icmp ugt v1, v2
; asm: testl $0xff, %edi
; asm: je ebb1
brz v3, ebb1 ; bin: f7 c7 000000ff 0f 84 00000015
; asm: testb %bl, %bl
; asm: je ebb1
brz v4, ebb1 ; bin: 84 db 74 11
; asm: testl $0xff, %edi
; asm: jne ebb1
brnz v3, ebb1 ; bin: f7 c7 000000ff 0f 85 00000005
; asm: testb %bl, %bl
; asm: jne ebb1
brnz v4, ebb1 ; bin: 84 db 75 01
return
ebb1:
return
}

View File

@@ -292,6 +292,12 @@ enc_i32_i64(base.brnz, r.tjccd, 0x85)
# Branch on a b1 value in a register only looks at the low 8 bits. See also # Branch on a b1 value in a register only looks at the low 8 bits. See also
# bint encodings below. # bint encodings below.
#
# Start with the worst-case encoding for I32 only. The register allocator can't
# handle a branch with an ABCD-constrained operand.
I32.enc(base.brz.b1, *r.t8jccd_long(0x84))
I32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
enc_both(base.brz.b1, r.t8jccb_abcd, 0x74) enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
enc_both(base.brz.b1, r.t8jccd_abcd, 0x84) enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75) enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75)

View File

@@ -848,6 +848,25 @@ t8jccd_abcd = TailRecipe(
disp4(destination, func, sink); disp4(destination, func, sink);
''') ''')
# Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
# The register allocator can't handle a branch instruction with constrained
# operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
# any register, but is is larger because it uses a 32-bit test instruction with
# a 0xff immediate.
t8jccd_long = TailRecipe(
't8jccd_long', Branch, size=5 + 6, ins=GPR, outs=(),
branch_range=32,
emit='''
// test32 r, 0xff.
PUT_OP((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
modrm_r_bits(in_reg0, bits, sink);
sink.put4(0xff);
// Jcc instruction.
sink.put1(0x0f);
sink.put1(bits as u8);
disp4(destination, func, sink);
''')
# Comparison that produces a `b1` result in a GPR. # Comparison that produces a `b1` result in a GPR.
# #
# This is a macro of a `cmp` instruction followed by a `setCC` instruction. # This is a macro of a `cmp` instruction followed by a `setCC` instruction.