From 82fbc78f2fb95f7854ef26cad1c469ff0deafade Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 19 Jul 2017 10:47:51 -0700 Subject: [PATCH] Add Intel encodings for the icmp instruction. This instruction returns a `b1` value which is represented as the output of a setCC instruction which is the low 8 bits of a GPR register. Use a cmp+setCC macro recipe to encode this. That is not ideal, but we can't represent CPU flags yet. --- filetests/isa/intel/binary32.cton | 72 +++++++++ filetests/isa/intel/binary64.cton | 184 ++++++++++++++++++++--- lib/cretonne/meta/base/legalize.py | 13 +- lib/cretonne/meta/isa/intel/encodings.py | 8 + lib/cretonne/meta/isa/intel/recipes.py | 42 ++++++ 5 files changed, 297 insertions(+), 22 deletions(-) diff --git a/filetests/isa/intel/binary32.cton b/filetests/isa/intel/binary32.cton index 7b43ed7aa2..2ee9396bf0 100644 --- a/filetests/isa/intel/binary32.cton +++ b/filetests/isa/intel/binary32.cton @@ -254,6 +254,78 @@ ebb0: ; asm: tzcntl %ecx, %esi [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 + ; Integer comparisons. + + ; asm: cmpl %esi, %ecx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 + ; asm: cmpl %ecx, %esi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v1 ; bin: 39 ce 0f 94 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 + ; asm: cmpl %ecx, %esi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v1 ; bin: 39 ce 0f 95 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 + ; asm: cmpl %ecx, %esi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v1 ; bin: 39 ce 0f 9c c2 + + ; asm: cmpl %esi, %ecx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 + ; asm: cmpl %ecx, %esi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v1 ; bin: 39 ce 0f 9d c2 + + ; asm: cmpl %esi, %ecx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 + ; asm: cmpl %ecx, %esi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v1 ; bin: 39 ce 0f 9f c2 + + ; asm: cmpl %esi, %ecx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 + ; asm: cmpl %ecx, %esi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v1 ; bin: 39 ce 0f 9e c2 + + ; asm: cmpl %esi, %ecx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 + ; asm: cmpl %ecx, %esi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v1 ; bin: 39 ce 0f 92 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 + ; asm: cmpl %ecx, %esi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v1 ; bin: 39 ce 0f 93 c2 + + ; asm: cmpl %esi, %ecx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 + ; asm: cmpl %ecx, %esi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v1 ; bin: 39 ce 0f 97 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 + ; asm: cmpl %ecx, %esi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v1 ; bin: 39 ce 0f 96 c2 + ; asm: call foo call fn0() ; bin: e8 PCRel4(fn0) 00000000 diff --git a/filetests/isa/intel/binary64.cton b/filetests/isa/intel/binary64.cton index 1d88b69d50..00524afc4e 100644 --- a/filetests/isa/intel/binary64.cton +++ b/filetests/isa/intel/binary64.cton @@ -192,22 +192,94 @@ ebb0: ; asm: tzcntq %rcx, %r10 [-,%r10] v208 = ctz v1 ; bin: f3 4c 0f bc d1 - ; asm: testq %rcx, %ecx + ; Integer comparisons. + + ; asm: cmpq %rsi, %rcx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 48 39 f1 0f 94 c3 + ; asm: cmpq %r10, %rsi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v3 ; bin: 4c 39 d6 0f 94 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 48 39 f1 0f 95 c3 + ; asm: cmpq %r10, %rsi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v3 ; bin: 4c 39 d6 0f 95 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 48 39 f1 0f 9c c3 + ; asm: cmpq %r10, %rsi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v3 ; bin: 4c 39 d6 0f 9c c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 48 39 f1 0f 9d c3 + ; asm: cmpq %r10, %rsi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v3 ; bin: 4c 39 d6 0f 9d c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 48 39 f1 0f 9f c3 + ; asm: cmpq %r10, %rsi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 4c 39 d6 0f 9f c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 48 39 f1 0f 9e c3 + ; asm: cmpq %r10, %rsi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v3 ; bin: 4c 39 d6 0f 9e c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 48 39 f1 0f 92 c3 + ; asm: cmpq %r10, %rsi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v3 ; bin: 4c 39 d6 0f 92 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 48 39 f1 0f 93 c3 + ; asm: cmpq %r10, %rsi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v3 ; bin: 4c 39 d6 0f 93 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 48 39 f1 0f 97 c3 + ; asm: cmpq %r10, %rsi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 4c 39 d6 0f 97 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 48 39 f1 0f 96 c3 + ; asm: cmpq %r10, %rsi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2 + + ; asm: testq %rcx, %rcx ; asm: je ebb1 brz v1, ebb1 ; bin: 48 85 c9 74 1b - ; asm: testq %rsi, %esi + ; asm: testq %rsi, %rsi ; asm: je ebb1 brz v2, ebb1 ; bin: 48 85 f6 74 16 - ; asm: testq %r10, %r10d + ; asm: testq %r10, %r10 ; asm: je ebb1 brz v3, ebb1 ; bin: 4d 85 d2 74 11 - ; asm: testq %rcx, %ecx + ; asm: testq %rcx, %rcx ; asm: jne ebb1 brnz v1, ebb1 ; bin: 48 85 c9 75 0c - ; asm: test %rsi, %esi + ; asm: testq %rsi, %rsi ; asm: jne ebb1 brnz v2, ebb1 ; bin: 48 85 f6 75 07 - ; asm: testq %r10, %r10d + ; asm: testq %r10, %r10 ; asm: jne ebb1 brnz v3, ebb1 ; bin: 4d 85 d2 75 02 @@ -376,15 +448,15 @@ ebb0: [-,%rax] v130 = iconst.i32 1 [-,%rdx] v131 = iconst.i32 2 - ; asm: idivl %rcx + ; asm: idivl %ecx [-,%rax,%rdx] v132, v133 = x86_sdivmodx v130, v131, v1 ; bin: 40 f7 f9 - ; asm: idivl %rsi + ; asm: idivl %esi [-,%rax,%rdx] v134, v135 = x86_sdivmodx v130, v131, v2 ; bin: 40 f7 fe ; asm: idivl %r10d [-,%rax,%rdx] v136, v137 = x86_sdivmodx v130, v131, v3 ; bin: 41 f7 fa - ; asm: divl %rcx + ; asm: divl %ecx [-,%rax,%rdx] v138, v139 = x86_udivmodx v130, v131, v1 ; bin: 40 f7 f1 - ; asm: divl %rsi + ; asm: divl %esi [-,%rax,%rdx] v140, v141 = x86_udivmodx v130, v131, v2 ; bin: 40 f7 f6 ; asm: divl %r10d [-,%rax,%rdx] v142, v143 = x86_udivmodx v130, v131, v3 ; bin: 41 f7 f2 @@ -412,33 +484,105 @@ ebb0: ; asm: tzcntl %ecx, %r10d [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 + ; Integer comparisons. + + ; asm: cmpl %esi, %ecx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 40 39 f1 0f 94 c3 + ; asm: cmpl %r10d, %esi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v3 ; bin: 44 39 d6 0f 94 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 40 39 f1 0f 95 c3 + ; asm: cmpl %r10d, %esi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v3 ; bin: 44 39 d6 0f 95 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 40 39 f1 0f 9c c3 + ; asm: cmpl %r10d, %esi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v3 ; bin: 44 39 d6 0f 9c c2 + + ; asm: cmpl %esi, %ecx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 40 39 f1 0f 9d c3 + ; asm: cmpl %r10d, %esi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v3 ; bin: 44 39 d6 0f 9d c2 + + ; asm: cmpl %esi, %ecx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 40 39 f1 0f 9f c3 + ; asm: cmpl %r10d, %esi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 44 39 d6 0f 9f c2 + + ; asm: cmpl %esi, %ecx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 40 39 f1 0f 9e c3 + ; asm: cmpl %r10d, %esi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v3 ; bin: 44 39 d6 0f 9e c2 + + ; asm: cmpl %esi, %ecx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 40 39 f1 0f 92 c3 + ; asm: cmpl %r10d, %esi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v3 ; bin: 44 39 d6 0f 92 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 40 39 f1 0f 93 c3 + ; asm: cmpl %r10d, %esi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v3 ; bin: 44 39 d6 0f 93 c2 + + ; asm: cmpl %esi, %ecx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 40 39 f1 0f 97 c3 + ; asm: cmpl %r10d, %esi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 44 39 d6 0f 97 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 40 39 f1 0f 96 c3 + ; asm: cmpl %r10d, %esi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2 + ; asm: testl %ecx, %ecx - ; asm: je ebb1 + ; asm: je ebb1x brz v1, ebb1 ; bin: 40 85 c9 74 1b ; asm: testl %esi, %esi - ; asm: je ebb1 + ; asm: je ebb1x brz v2, ebb1 ; bin: 40 85 f6 74 16 ; asm: testl %r10d, %r10d - ; asm: je ebb1 + ; asm: je ebb1x brz v3, ebb1 ; bin: 45 85 d2 74 11 ; asm: testl %ecx, %ecx - ; asm: jne ebb1 + ; asm: jne ebb1x brnz v1, ebb1 ; bin: 40 85 c9 75 0c - ; asm: test %esi, %esi - ; asm: jne ebb1 + ; asm: testl %esi, %esi + ; asm: jne ebb1x brnz v2, ebb1 ; bin: 40 85 f6 75 07 ; asm: testl %r10d, %r10d - ; asm: jne ebb1 + ; asm: jne ebb1x brnz v3, ebb1 ; bin: 45 85 d2 75 02 - ; asm: jmp ebb2 + ; asm: jmp ebb2x jump ebb2 ; bin: eb 01 - ; asm: ebb1: + ; asm: ebb1x: ebb1: return ; bin: c3 - ; asm: ebb2: + ; asm: ebb2x: ebb2: jump ebb1 ; bin: eb fd } diff --git a/lib/cretonne/meta/base/legalize.py b/lib/cretonne/meta/base/legalize.py index 8211a0039f..058c87f062 100644 --- a/lib/cretonne/meta/base/legalize.py +++ b/lib/cretonne/meta/base/legalize.py @@ -11,7 +11,8 @@ from .immediates import intcc from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm from .instructions import isub, isub_bin, isub_bout, isub_borrow from .instructions import band, bor, bxor, isplit, iconcat -from .instructions import icmp, iconst, bint +from .instructions import icmp, icmp_imm +from .instructions import iconst, bint from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup @@ -53,6 +54,7 @@ yl = Var('yl') yh = Var('yh') al = Var('al') ah = Var('ah') +cc = Var('cc') narrow.legalize( a << iadd(x, y), @@ -135,10 +137,17 @@ expand.legalize( b << bor(b1, b2) )) -# Expansions for immediates that are too large. +# Expansions for immediate operands that are out of range. expand.legalize( a << iadd_imm(x, y), Rtl( a1 << iconst(y), a << iadd(x, a1) )) + +expand.legalize( + a << icmp_imm(cc, x, y), + Rtl( + a1 << iconst(y), + a << icmp(cc, x, a1) + )) diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index f5e8aabd50..9af8d6484d 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -174,3 +174,11 @@ I32.enc(base.brnz.i32, *r.tjccb(0x75)) I64.enc(base.brnz.i64, *r.tjccb.rex(0x75, w=1)) I64.enc(base.brnz.i32, *r.tjccb.rex(0x75)) I64.enc(base.brnz.i32, *r.tjccb(0x75)) + +# +# Comparisons +# +I32.enc(base.icmp.i32, *r.icscc(0x39)) +I64.enc(base.icmp.i64, *r.icscc.rex(0x39, w=1)) +I64.enc(base.icmp.i32, *r.icscc.rex(0x39)) +I64.enc(base.icmp.i32, *r.icscc(0x39)) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index f68a8344b3..7fa7ca1bf3 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -6,6 +6,7 @@ from cdsl.isa import EncRecipe from cdsl.predicates import IsSignedInt, IsEqual from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry from base.formats import Call, IndirectCall, Store, Load +from base.formats import IntCompare from base.formats import RegMove, Ternary, Jump, Branch from .registers import GPR, ABCD @@ -464,3 +465,44 @@ tjccb = TailRecipe( sink.put1(bits as u8); disp1(destination, func, sink); ''') + +# Comparison that produces a `b1` result in a GPR. +# +# This is a macro of a `cmp` instruction followed by a `setCC` instruction. +# This is not a great solution because: +# +# - The cmp+setcc combination is not recognized by CPU's macro fusion. +# - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` +# instructions may need a REX independently. +# - Modeling CPU flags in the type system would be better. +# +# Since the `setCC` instructions only write an 8-bit register, we use that as +# our `b1` representation: A `b1` value is represented as a GPR where the low 8 +# bits are known to be 0 or 1. The high bits are undefined. +# +# This bandaid macro doesn't support a REX prefix for the final `setCC` +# instruction, so it is limited to the `ABCD` register class for booleans. +icscc = TailRecipe( + 'cscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD, + emit=''' + // Comparison instruction. + PUT_OP(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + // `setCC` instruction, no REX. + use ir::condcodes::IntCC::*; + let setcc = match cond { + Equal => 0x94, + NotEqual => 0x95, + SignedLessThan => 0x9c, + SignedGreaterThanOrEqual => 0x9d, + SignedGreaterThan => 0x9f, + SignedLessThanOrEqual => 0x9e, + UnsignedLessThan => 0x92, + UnsignedGreaterThanOrEqual => 0x93, + UnsignedGreaterThan => 0x97, + UnsignedLessThanOrEqual => 0x96, + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + ''')