Add encodings for CPU flags instructions.

Branch on flags: brif, brff,
Compare integers to flags: ifcmp
Compare floats to flags: ffcmp
Convert flags to b1: trueif, trueff
This commit is contained in:
Jakob Stoklund Olesen
2017-10-13 16:44:34 -07:00
parent 0f4f663584
commit 5d065c4d8f
7 changed files with 430 additions and 9 deletions

View File

@@ -220,6 +220,13 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2
; asm: ucomiss %xmm2, %xmm5
[-,%eflags] v310 = ffcmp v10, v11 ; bin: 0f 2e ea
; asm: ucomiss %xmm2, %xmm5
[-,%eflags] v311 = ffcmp v11, v10 ; bin: 0f 2e d5
; asm: ucomiss %xmm5, %xmm5
[-,%eflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
return
}
@@ -428,5 +435,57 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2
; asm: ucomisd %xmm2, %xmm5
[-,%eflags] v310 = ffcmp v10, v11 ; bin: 66 0f 2e ea
; asm: ucomisd %xmm2, %xmm5
[-,%eflags] v311 = ffcmp v11, v10 ; bin: 66 0f 2e d5
; asm: ucomisd %xmm5, %xmm5
[-,%eflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed
return
}
function %cpuflags_float(f32 [%xmm0]) {
ebb0(v0: f32 [%xmm0]):
; asm: ucomiss %xmm0, %xmm0
[-,%eflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0
jump ebb1
ebb1:
; asm: jnp ebb1
brff ord v1, ebb1 ; bin: 7b fe
; asm: jp ebb1
brff uno v1, ebb1 ; bin: 7a fc
; asm: jne ebb1
brff one v1, ebb1 ; bin: 75 fa
; asm: je ebb1
brff ueq v1, ebb1 ; bin: 74 f8
; asm: ja ebb1
brff gt v1, ebb1 ; bin: 77 f6
; asm: jae ebb1
brff ge v1, ebb1 ; bin: 73 f4
; asm: jb ebb1
brff ult v1, ebb1 ; bin: 72 f2
; asm: jbe ebb1
brff ule v1, ebb1 ; bin: 76 f0
; asm: setnp %bl
[-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3
; asm: setp %bl
[-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3
; asm: setne %dl
[-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2
; asm: sete %dl
[-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2
; asm: seta %al
[-,%rax] v14 = trueff gt v1 ; bin: 0f 97 c0
; asm: setae %al
[-,%rax] v15 = trueff ge v1 ; bin: 0f 93 c0
; asm: setb %cl
[-,%rcx] v16 = trueff ult v1 ; bin: 0f 92 c1
; asm: setbe %cl
[-,%rcx] v17 = trueff ule v1 ; bin: 0f 96 c1
return
}

View File

@@ -421,3 +421,61 @@ ebb0:
ebb1:
return
}
; CPU flag instructions.
function %cpu_flags() {
ebb0:
[-,%rcx] v1 = iconst.i32 1
[-,%rsi] v2 = iconst.i32 2
jump ebb1
ebb1:
; asm: cmpl %esi, %ecx
[-,%eflags] v10 = ifcmp v1, v2 ; bin: 39 f1
; asm: cmpl %ecx, %esi
[-,%eflags] v11 = ifcmp v2, v1 ; bin: 39 ce
; asm: je ebb1
brif eq v11, ebb1 ; bin: 74 fa
; asm: jne ebb1
brif ne v11, ebb1 ; bin: 75 f8
; asm: jl ebb1
brif slt v11, ebb1 ; bin: 7c f6
; asm: jge ebb1
brif sge v11, ebb1 ; bin: 7d f4
; asm: jg ebb1
brif sgt v11, ebb1 ; bin: 7f f2
; asm: jle ebb1
brif sle v11, ebb1 ; bin: 7e f0
; asm: jb ebb1
brif ult v11, ebb1 ; bin: 72 ee
; asm: jae ebb1
brif uge v11, ebb1 ; bin: 73 ec
; asm: ja ebb1
brif ugt v11, ebb1 ; bin: 77 ea
; asm: jbe ebb1
brif ule v11, ebb1 ; bin: 76 e8
; asm: sete %bl
[-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3
; asm: setne %bl
[-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3
; asm: setl %dl
[-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2
; asm: setge %dl
[-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2
; asm: setg %bl
[-,%rbx] v24 = trueif sgt v11 ; bin: 0f 9f c3
; asm: setle %bl
[-,%rbx] v25 = trueif sle v11 ; bin: 0f 9e c3
; asm: setb %dl
[-,%rdx] v26 = trueif ult v11 ; bin: 0f 92 c2
; asm: setae %dl
[-,%rdx] v27 = trueif uge v11 ; bin: 0f 93 c2
; asm: seta %bl
[-,%rbx] v28 = trueif ugt v11 ; bin: 0f 97 c3
; asm: setbe %bl
[-,%rbx] v29 = trueif ule v11 ; bin: 0f 96 c3
return
}

View File

@@ -229,6 +229,13 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2
; asm: ucomiss %xmm10, %xmm5
[-,%eflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea
; asm: ucomiss %xmm10, %xmm5
[-,%eflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5
; asm: ucomiss %xmm5, %xmm5
[-,%eflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
return
}
@@ -452,5 +459,57 @@ ebb0:
; asm: setbe %dl
[-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2
; asm: ucomisd %xmm10, %xmm5
[-,%eflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea
; asm: ucomisd %xmm10, %xmm5
[-,%eflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5
; asm: ucomisd %xmm5, %xmm5
[-,%eflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed
return
}
function %cpuflags_float(f32 [%xmm0]) {
ebb0(v0: f32 [%xmm0]):
; asm: ucomiss %xmm0, %xmm0
[-,%eflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0
jump ebb1
ebb1:
; asm: jnp ebb1
brff ord v1, ebb1 ; bin: 7b fe
; asm: jp ebb1
brff uno v1, ebb1 ; bin: 7a fc
; asm: jne ebb1
brff one v1, ebb1 ; bin: 75 fa
; asm: je ebb1
brff ueq v1, ebb1 ; bin: 74 f8
; asm: ja ebb1
brff gt v1, ebb1 ; bin: 77 f6
; asm: jae ebb1
brff ge v1, ebb1 ; bin: 73 f4
; asm: jb ebb1
brff ult v1, ebb1 ; bin: 72 f2
; asm: jbe ebb1
brff ule v1, ebb1 ; bin: 76 f0
; asm: setnp %bl
[-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3
; asm: setp %bl
[-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3
; asm: setne %dl
[-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2
; asm: sete %dl
[-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2
; asm: seta %r10b
[-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2
; asm: setae %r10b
[-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2
; asm: setb %r14b
[-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6
; asm: setbe %r14b
[-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6
return
}

View File

@@ -495,6 +495,64 @@ ebb2:
jump ebb1 ; bin: eb fd
}
; CPU flag instructions.
function %cpu_flags_I64() {
ebb0:
[-,%rcx] v1 = iconst.i64 1
[-,%r10] v2 = iconst.i64 2
jump ebb1
ebb1:
; asm: cmpq %r10, %rcx
[-,%eflags] v10 = ifcmp v1, v2 ; bin: 4c 39 d1
; asm: cmpq %rcx, %r10
[-,%eflags] v11 = ifcmp v2, v1 ; bin: 49 39 ca
; asm: je ebb1
brif eq v11, ebb1 ; bin: 74 f8
; asm: jne ebb1
brif ne v11, ebb1 ; bin: 75 f6
; asm: jl ebb1
brif slt v11, ebb1 ; bin: 7c f4
; asm: jge ebb1
brif sge v11, ebb1 ; bin: 7d f2
; asm: jg ebb1
brif sgt v11, ebb1 ; bin: 7f f0
; asm: jle ebb1
brif sle v11, ebb1 ; bin: 7e ee
; asm: jb ebb1
brif ult v11, ebb1 ; bin: 72 ec
; asm: jae ebb1
brif uge v11, ebb1 ; bin: 73 ea
; asm: ja ebb1
brif ugt v11, ebb1 ; bin: 77 e8
; asm: jbe ebb1
brif ule v11, ebb1 ; bin: 76 e6
; asm: sete %bl
[-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3
; asm: setne %bl
[-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3
; asm: setl %dl
[-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2
; asm: setge %dl
[-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2
; asm: setg %r10b
[-,%r10] v24 = trueif sgt v11 ; bin: 41 0f 9f c2
; asm: setle %r10b
[-,%r10] v25 = trueif sle v11 ; bin: 41 0f 9e c2
; asm: setb %r14b
[-,%r14] v26 = trueif ult v11 ; bin: 41 0f 92 c6
; asm: setae %r14b
[-,%r14] v27 = trueif uge v11 ; bin: 41 0f 93 c6
; asm: seta %r11b
[-,%r11] v28 = trueif ugt v11 ; bin: 41 0f 97 c3
; asm: setbe %r11b
[-,%r11] v29 = trueif ule v11 ; bin: 41 0f 96 c3
return
}
; Tests for i32 instructions in 64-bit mode.
;
; Note that many i32 instructions can be encoded both with and without a REX
@@ -860,6 +918,11 @@ ebb0:
; asm: movl 1032(%rsp), %ecx
regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408
; asm: cmpl %esi, %ecx
[-,%eflags] v520 = ifcmp v1, v2 ; bin: 39 f1
; asm: cmpl %r10d, %esi
[-,%eflags] v521 = ifcmp v2, v3 ; bin: 44 39 d6
; asm: testl %ecx, %ecx
; asm: je ebb1x
brz v1, ebb1 ; bin: 85 c9 74 18

View File

@@ -279,10 +279,15 @@ I64.enc(base.x_return, *r.ret(0xc3))
#
# Branches
#
I32.enc(base.jump, *r.jmpb(0xeb))
I32.enc(base.jump, *r.jmpd(0xe9))
I64.enc(base.jump, *r.jmpb(0xeb))
I64.enc(base.jump, *r.jmpd(0xe9))
enc_both(base.jump, r.jmpb, 0xeb)
enc_both(base.jump, r.jmpd, 0xe9)
enc_both(base.brif, r.brib, 0x70)
enc_both(base.brif, r.brid, 0x0f, 0x80)
# Not all float condition codes are legal, see `supported_floatccs`.
enc_both(base.brff, r.brfb, 0x70)
enc_both(base.brff, r.brfd, 0x0f, 0x80)
# Note that the tjccd opcode will be prefixed with 0x0f.
enc_i32_i64(base.brz, r.tjccb, 0x74)
@@ -313,6 +318,14 @@ I64.enc(base.trap, *r.trap(0x0f, 0x0b))
# Comparisons
#
enc_i32_i64(base.icmp, r.icscc, 0x39)
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
#
# Convert flags to bool.
#
# This encodes `b1` as an 8-bit low register with the value 0 or 1.
enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
#
# Convert bool to int.
@@ -416,3 +429,6 @@ enc_both(base.band_not.f64, r.fax, 0x0f, 0x55)
# handled by legalization patterns.
enc_both(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
enc_both(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
enc_both(base.ffcmp.f32, r.fcmp, 0x0f, 0x2e)
enc_both(base.ffcmp.f64, r.fcmp, 0x66, 0x0f, 0x2e)

View File

@@ -7,10 +7,11 @@ from cdsl.predicates import IsSignedInt, IsEqual, Or
from cdsl.registers import RegClass
from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry
from base.formats import Trap, Call, IndirectCall, Store, Load
from base.formats import IntCompare, FloatCompare
from base.formats import Ternary, Jump, Branch, FuncAddr
from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
from base.formats import Jump, Branch, BranchInt, BranchFloat
from base.formats import Ternary, FuncAddr
from base.formats import RegMove, RegSpill, RegFill
from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32
from .registers import GPR, ABCD, FPR, GPR8, FPR8, FLAG, StackGPR32, StackFPR32
from .defs import supported_floatccs
try:
@@ -250,6 +251,15 @@ class TailRecipe:
assert name == obj.name, "Mismatched TailRecipe name: " + name
def floatccs(iform):
# type: (InstructionFormat) -> PredNode
"""
Return an instruction predicate that checks in `iform.cond` is one of the
directly supported floating point condition codes.
"""
return Or(*(IsEqual(iform.cond, cc) for cc in supported_floatccs))
# A null unary instruction that takes a GPR register. Can be used for identity
# copies and no-op conversions.
null = EncRecipe('null', Unary, size=0, ins=GPR, outs=0, emit='')
@@ -754,6 +764,100 @@ jmpd = TailRecipe(
disp4(destination, func, sink);
''')
brib = TailRecipe(
'brib', BranchInt, size=1, ins=FLAG.eflags, outs=(),
branch_range=8,
emit='''
PUT_OP(bits | icc2opc(cond), BASE_REX, sink);
disp1(destination, func, sink);
''')
brid = TailRecipe(
'brid', BranchInt, size=4, ins=FLAG.eflags, outs=(),
branch_range=32,
emit='''
PUT_OP(bits | icc2opc(cond), BASE_REX, sink);
disp4(destination, func, sink);
''')
brfb = TailRecipe(
'brfb', BranchFloat, size=1, ins=FLAG.eflags, outs=(),
branch_range=8,
instp=floatccs(BranchFloat),
emit='''
PUT_OP(bits | fcc2opc(cond), BASE_REX, sink);
disp1(destination, func, sink);
''')
brfd = TailRecipe(
'brfd', BranchFloat, size=4, ins=FLAG.eflags, outs=(),
branch_range=32,
instp=floatccs(BranchFloat),
emit='''
PUT_OP(bits | fcc2opc(cond), BASE_REX, sink);
disp4(destination, func, sink);
''')
#
# Test flags and set a register.
#
# These setCC instructions only set the low 8 bits, and they can only write
# ABCD registers without a REX prefix.
#
# Other instruction encodings accepting `b1` inputs have the same constraints
# and only look at the low 8 bits of the input register.
#
seti = TailRecipe(
'seti', IntCond, size=1, ins=FLAG.eflags, outs=GPR,
requires_prefix=True,
emit='''
PUT_OP(bits | icc2opc(cond), rex1(out_reg0), sink);
modrm_r_bits(out_reg0, bits, sink);
''')
seti_abcd = TailRecipe(
'seti_abcd', IntCond, size=1, ins=FLAG.eflags, outs=ABCD,
when_prefixed=seti,
emit='''
PUT_OP(bits | icc2opc(cond), rex1(out_reg0), sink);
modrm_r_bits(out_reg0, bits, sink);
''')
setf = TailRecipe(
'setf', FloatCond, size=1, ins=FLAG.eflags, outs=GPR,
requires_prefix=True,
emit='''
PUT_OP(bits | fcc2opc(cond), rex1(out_reg0), sink);
modrm_r_bits(out_reg0, bits, sink);
''')
setf_abcd = TailRecipe(
'setf_abcd', FloatCond, size=1, ins=FLAG.eflags, outs=ABCD,
when_prefixed=setf,
emit='''
PUT_OP(bits | fcc2opc(cond), rex1(out_reg0), sink);
modrm_r_bits(out_reg0, bits, sink);
''')
#
# Compare and set flags.
#
# XX /r, MR form. Compare two GPR registers and set flags.
rcmp = TailRecipe(
'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.eflags,
emit='''
PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
modrm_rr(in_reg0, in_reg1, sink);
''')
# XX /r, RM form. Compare two FPR registers and set flags.
fcmp = TailRecipe(
'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.eflags,
emit='''
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
modrm_rr(in_reg1, in_reg0, sink);
''')
# Test-and-branch.
#
# This recipe represents the macro fusion of a test and a conditional branch.
@@ -926,8 +1030,7 @@ icscc = TailRecipe(
# The omission of a `when_prefixed` alternative is deliberate here.
fcscc = TailRecipe(
'fcscc', FloatCompare, size=1 + 3, ins=(FPR, FPR), outs=ABCD,
instp=Or(*(IsEqual(FloatCompare.cond, cc)
for cc in supported_floatccs)),
instp=floatccs(FloatCompare),
emit='''
// Comparison instruction.
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);

View File

@@ -2,6 +2,7 @@
use binemit::{CodeSink, Reloc, bad_encoding};
use ir::{Function, Inst, Ebb, InstructionData, Opcode};
use ir::condcodes::{IntCC, FloatCC};
use isa::{RegUnit, StackRef, StackBase, StackBaseMask};
use regalloc::RegDiversions;
use super::registers::RU;
@@ -237,6 +238,68 @@ fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
sink.put1(b);
}
/// Get the low 4 bits of an opcode for an integer condition code.
///
/// Add this offset to a base opcode for:
///
/// ---- 0x70: Short conditional branch.
/// 0x0f 0x80: Long conditional branch.
/// 0x0f 0x90: SetCC.
///
fn icc2opc(cond: IntCC) -> u16 {
use ir::condcodes::IntCC::*;
match cond {
// 0x0 = Overflow.
// 0x1 = !Overflow.
UnsignedLessThan => 0x2,
UnsignedGreaterThanOrEqual => 0x3,
Equal => 0x4,
NotEqual => 0x5,
UnsignedLessThanOrEqual => 0x6,
UnsignedGreaterThan => 0x7,
// 0x8 = Sign.
// 0x9 = !Sign.
// 0xa = Parity even.
// 0xb = Parity odd.
SignedLessThan => 0xc,
SignedGreaterThanOrEqual => 0xd,
SignedLessThanOrEqual => 0xe,
SignedGreaterThan => 0xf,
}
}
/// Get the low 4 bits of an opcode for a floating point condition code.
///
/// The ucomiss/ucomisd instructions set the EFLAGS bits CF/PF/CF like this:
///
/// ZPC OSA
/// UN 111 000
/// GT 000 000
/// LT 001 000
/// EQ 100 000
///
/// Not all floating point condition codes are supported.
fn fcc2opc(cond: FloatCC) -> u16 {
use ir::condcodes::FloatCC::*;
match cond {
Ordered => 0xb, // EQ|LT|GT => *np (P=0)
Unordered => 0xa, // UN => *p (P=1)
OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
GreaterThan => 0x7, // GT => *a (C=0&Z=0)
GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
Equal | // EQ
NotEqual | // UN|LT|GT
LessThan | // LT
LessThanOrEqual | // LT|EQ
UnorderedOrGreaterThan | // UN|GT
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
=> panic!("{} not supported", cond),
}
}
/// Emit a single-byte branch displacement to `destination`.
fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);