diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index e777cfb359..c1a7c7c935 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -1,5 +1,6 @@ ; binary emission of 32-bit code. test binemit +set is_compressed isa intel haswell ; The binary encodings can be verified with the command: @@ -393,3 +394,30 @@ ebb1: ebb2: trap user0 ; bin: 0f 0b } + +; Special branch encodings only for I32 mode. +function %special_branches() { +ebb0: + [-,%rcx] v1 = iconst.i32 1 + [-,%rsi] v2 = iconst.i32 2 + [-,%rdi] v3 = icmp eq v1, v2 + [-,%rbx] v4 = icmp ugt v1, v2 + + ; asm: testl $0xff, %edi + ; asm: je ebb1 + brz v3, ebb1 ; bin: f7 c7 000000ff 0f 84 00000015 + ; asm: testb %bl, %bl + ; asm: je ebb1 + brz v4, ebb1 ; bin: 84 db 74 11 + ; asm: testl $0xff, %edi + ; asm: jne ebb1 + brnz v3, ebb1 ; bin: f7 c7 000000ff 0f 85 00000005 + ; asm: testb %bl, %bl + ; asm: jne ebb1 + brnz v4, ebb1 ; bin: 84 db 75 01 + + return + +ebb1: + return +} diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 10d89a33b4..6c1de71d87 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -292,6 +292,12 @@ enc_i32_i64(base.brnz, r.tjccd, 0x85) # Branch on a b1 value in a register only looks at the low 8 bits. See also # bint encodings below. +# +# Start with the worst-case encoding for I32 only. The register allocator can't +# handle a branch with an ABCD-constrained operand. +I32.enc(base.brz.b1, *r.t8jccd_long(0x84)) +I32.enc(base.brnz.b1, *r.t8jccd_long(0x85)) + enc_both(base.brz.b1, r.t8jccb_abcd, 0x74) enc_both(base.brz.b1, r.t8jccd_abcd, 0x84) enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index cd59cc1bbb..e65f0a8217 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -848,6 +848,25 @@ t8jccd_abcd = TailRecipe( disp4(destination, func, sink); ''') +# Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. +# The register allocator can't handle a branch instruction with constrained +# operands like the t8jccd_abcd above. This variant can accept the b1 opernd in +# any register, but is is larger because it uses a 32-bit test instruction with +# a 0xff immediate. +t8jccd_long = TailRecipe( + 't8jccd_long', Branch, size=5 + 6, ins=GPR, outs=(), + branch_range=32, + emit=''' + // test32 r, 0xff. + PUT_OP((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put4(0xff); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + ''') + # Comparison that produces a `b1` result in a GPR. # # This is a macro of a `cmp` instruction followed by a `setCC` instruction.