diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton new file mode 100644 index 0000000000..48e328b455 --- /dev/null +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -0,0 +1,248 @@ +; binary emission of 64-bit code. +test binemit +set is_64bit +isa intel + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64.cton | llvm-mc -show-encoding -triple=x86_64 +; + +; Tests for i64 instructions. +function %I64() { + fn0 = function %foo() + sig0 = signature() + +ebb0: + + ; Integer Constants. + + ; asm: movq $0x01020304f1f2f3f4, %rcx + [-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4 + ; asm: movq $0x11020304f1f2f3f4, %rsi + [-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4 + ; asm: movq $0x21020304f1f2f3f4, %r10 + [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 + ; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant. + [-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122 + ; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant. + [-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122 + + ; Integer Register-Register Operations. + + ; asm: addq %rsi, %rcx + [-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1 + ; asm: addq %r10, %rsi + [-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6 + ; asm: addq %rcx, %r10 + [-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca + + ; asm: subq %rsi, %rcx + [-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1 + ; asm: subq %r10, %rsi + [-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6 + ; asm: subq %rcx, %r10 + [-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca + + ; asm: andq %rsi, %rcx + [-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1 + ; asm: andq %r10, %rsi + [-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6 + ; asm: andq %rcx, %r10 + [-,%r10] v32 = band v3, v1 ; bin: 49 21 ca + + ; asm: orq %rsi, %rcx + [-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1 + ; asm: orq %r10, %rsi + [-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6 + ; asm: orq %rcx, %r10 + [-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca + + ; asm: xorq %rsi, %rcx + [-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1 + ; asm: xorq %r10, %rsi + [-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6 + ; asm: xorq %rcx, %r10 + [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca + + ; asm: movq %rsi, %rcx + [-,%rcx] v60 = copy v2 ; bin: 48 89 f1 + ; asm: movq %r10, %rsi + [-,%rsi] v61 = copy v3 ; bin: 4c 89 d6 + ; asm: movq %rcx, %r10 + [-,%r10] v62 = copy v1 ; bin: 49 89 ca + + + ; Integer Register-Immediate Operations. + ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. + ; Some take 8-bit immediates that are sign-extended to 64 bits. + + ; asm: addq $-100000, %rcx + [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960 + ; asm: addq $100000, %rsi + [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0 + ; asm: addq $0x7fffffff, %r10 + [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff + ; asm: addq $100, %r8 + [-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64 + ; asm: addq $-100, %r14 + [-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c + + ; asm: andq $-100000, %rcx + [-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960 + ; asm: andq $100000, %rsi + [-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0 + ; asm: andq $0x7fffffff, %r10 + [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff + ; asm: andq $100, %r8 + [-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64 + ; asm: andq $-100, %r14 + [-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c + + ; asm: orq $-100000, %rcx + [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960 + ; asm: orq $100000, %rsi + [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0 + ; asm: orq $0x7fffffff, %r10 + [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff + ; asm: orq $100, %r8 + [-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64 + ; asm: orq $-100, %r14 + [-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c + ; asm: ret + + ; asm: xorq $-100000, %rcx + [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960 + ; asm: xorq $100000, %rsi + [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0 + ; asm: xorq $0x7fffffff, %r10 + [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff + ; asm: xorq $100, %r8 + [-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64 + ; asm: xorq $-100, %r14 + [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c + + return ; bin: c3 +} + +; Tests for i32 instructions in 64-bit mode. +; +; Note that many i32 instructions can be encoded both with and without a REX +; prefix if they only use the low 8 registers. Here, we are testing the REX +; encodings which are chosen by default. Switching to non-REX encodings should +; be done by an instruction shrinking pass. +function %I32() { + fn0 = function %foo() + sig0 = signature() + +ebb0: + + ; Integer Constants. + + ; asm: movl $0x01020304, %ecx + [-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: 40 b9 01020304 + ; asm: movl $0x11020304, %esi + [-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: 40 be 11020304 + ; asm: movl $0x21020304, %r10d + [-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304 + ; asm: movl $0xff001122, %r8d + [-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122 + ; asm: movl $0x88001122, %r14d + [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 + + ; Integer Register-Register Operations. + + ; asm: addl %esi, %ecx + [-,%rcx] v10 = iadd v1, v2 ; bin: 40 01 f1 + ; asm: addl %r10d, %esi + [-,%rsi] v11 = iadd v2, v3 ; bin: 44 01 d6 + ; asm: addl %ecx, %r10d + [-,%r10] v12 = iadd v3, v1 ; bin: 41 01 ca + + ; asm: subl %esi, %ecx + [-,%rcx] v20 = isub v1, v2 ; bin: 40 29 f1 + ; asm: subl %r10d, %esi + [-,%rsi] v21 = isub v2, v3 ; bin: 44 29 d6 + ; asm: subl %ecx, %r10d + [-,%r10] v22 = isub v3, v1 ; bin: 41 29 ca + + ; asm: andl %esi, %ecx + [-,%rcx] v30 = band v1, v2 ; bin: 40 21 f1 + ; asm: andl %r10d, %esi + [-,%rsi] v31 = band v2, v3 ; bin: 44 21 d6 + ; asm: andl %ecx, %r10d + [-,%r10] v32 = band v3, v1 ; bin: 41 21 ca + + ; asm: orl %esi, %ecx + [-,%rcx] v40 = bor v1, v2 ; bin: 40 09 f1 + ; asm: orl %r10d, %esi + [-,%rsi] v41 = bor v2, v3 ; bin: 44 09 d6 + ; asm: orl %ecx, %r10d + [-,%r10] v42 = bor v3, v1 ; bin: 41 09 ca + + ; asm: xorl %esi, %ecx + [-,%rcx] v50 = bxor v1, v2 ; bin: 40 31 f1 + ; asm: xorl %r10d, %esi + [-,%rsi] v51 = bxor v2, v3 ; bin: 44 31 d6 + ; asm: xorl %ecx, %r10d + [-,%r10] v52 = bxor v3, v1 ; bin: 41 31 ca + + ; asm: movl %esi, %ecx + [-,%rcx] v60 = copy v2 ; bin: 40 89 f1 + ; asm: movl %r10d, %esi + [-,%rsi] v61 = copy v3 ; bin: 44 89 d6 + ; asm: movl %ecx, %r10d + [-,%r10] v62 = copy v1 ; bin: 41 89 ca + + + ; Integer Register-Immediate Operations. + ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. + ; Some take 8-bit immediates that are sign-extended to 64 bits. + + ; asm: addl $-100000, %ecx + [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960 + ; asm: addl $100000, %esi + [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0 + ; asm: addl $0x7fffffff, %r10d + [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff + ; asm: addl $100, %r8d + [-,%r8] v73 = iadd_imm v4, 100 ; bin: 41 83 c0 64 + ; asm: addl $-100, %r14d + [-,%r14] v74 = iadd_imm v5, -100 ; bin: 41 83 c6 9c + + ; asm: andl $-100000, %ecx + [-,%rcx] v80 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960 + ; asm: andl $100000, %esi + [-,%rsi] v81 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0 + ; asm: andl $0x7fffffff, %r10d + [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff + ; asm: andl $100, %r8d + [-,%r8] v83 = band_imm v4, 100 ; bin: 41 83 e0 64 + ; asm: andl $-100, %r14d + [-,%r14] v84 = band_imm v5, -100 ; bin: 41 83 e6 9c + + ; asm: orl $-100000, %ecx + [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960 + ; asm: orl $100000, %esi + [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0 + ; asm: orl $0x7fffffff, %r10d + [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff + ; asm: orl $100, %r8d + [-,%r8] v93 = bor_imm v4, 100 ; bin: 41 83 c8 64 + ; asm: orl $-100, %r14d + [-,%r14] v94 = bor_imm v5, -100 ; bin: 41 83 ce 9c + ; asm: ret + + ; asm: xorl $-100000, %ecx + [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960 + ; asm: xorl $100000, %esi + [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0 + ; asm: xorl $0x7fffffff, %r10d + [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff + ; asm: xorl $100, %r8d + [-,%r8] v103 = bxor_imm v4, 100 ; bin: 41 83 f0 64 + ; asm: xorl $-100, %r14d + [-,%r14] v104 = bxor_imm v5, -100 ; bin: 41 83 f6 9c + + return ; bin: c3 +} diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 0b59b32da3..74dd5b5999 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -2,30 +2,65 @@ Intel Encodings. """ from __future__ import absolute_import +from cdsl.predicates import IsUnsignedInt from base import instructions as base -from .defs import I32 +from base.formats import UnaryImm +from .defs import I32, I64 from . import recipes as r -I32.enc(base.iadd.i32, *r.rr(0x01)) -I32.enc(base.isub.i32, *r.rr(0x29)) +for inst, opc in [ + (base.iadd, 0x01), + (base.isub, 0x29), + (base.band, 0x21), + (base.bor, 0x09), + (base.bxor, 0x31)]: + I32.enc(inst.i32, *r.rr(opc)) -I32.enc(base.band.i32, *r.rr(0x21)) -I32.enc(base.bor.i32, *r.rr(0x09)) -I32.enc(base.bxor.i32, *r.rr(0x31)) + I64.enc(inst.i64, *r.rr.rex(opc, w=1)) + I64.enc(inst.i32, *r.rr.rex(opc)) + # REX-less encoding must come after REX encoding so we don't use it by + # default. Otherwise reg-alloc would never use r8 and up. + I64.enc(inst.i32, *r.rr(opc)) I32.enc(base.copy.i32, *r.ur(0x89)) -# Immediate instructions with sign-extended 8-bit and 32-bit immediate. -for inst, rrr in [ - (base.iadd_imm.i32, 0), - (base.band_imm.i32, 4), - (base.bor_imm.i32, 1), - (base.bxor_imm.i32, 6)]: - I32.enc(inst, *r.rib(0x83, rrr=rrr)) - I32.enc(inst, *r.rid(0x81, rrr=rrr)) +I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1)) +I64.enc(base.copy.i32, *r.ur.rex(0x89)) +I64.enc(base.copy.i32, *r.ur(0x89)) -# Immediate constant. -I32.enc(base.iconst.i32, *r.uid(0xb8)) +# Immediate instructions with sign-extended 8-bit and 32-bit immediate. +for inst, rrr in [ + (base.iadd_imm, 0), + (base.band_imm, 4), + (base.bor_imm, 1), + (base.bxor_imm, 6)]: + I32.enc(inst.i32, *r.rib(0x83, rrr=rrr)) + I32.enc(inst.i32, *r.rid(0x81, rrr=rrr)) + + I64.enc(inst.i64, *r.rib.rex(0x83, rrr=rrr, w=1)) + I64.enc(inst.i64, *r.rid.rex(0x81, rrr=rrr, w=1)) + I64.enc(inst.i32, *r.rib.rex(0x83, rrr=rrr)) + I64.enc(inst.i32, *r.rid.rex(0x81, rrr=rrr)) + I64.enc(inst.i32, *r.rib(0x83, rrr=rrr)) + I64.enc(inst.i32, *r.rid(0x81, rrr=rrr)) + +# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as +# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks. + +# Immediate constants. +I32.enc(base.iconst.i32, *r.puid(0xb8)) + +I64.enc(base.iconst.i32, *r.puid.rex(0xb8)) +I64.enc(base.iconst.i32, *r.puid(0xb8)) +# The 32-bit immediate movl also zero-extends to 64 bits. +I64.enc(base.iconst.i64, *r.puid.rex(0xb8), + instp=IsUnsignedInt(UnaryImm.imm, 32)) +I64.enc(base.iconst.i64, *r.puid(0xb8), + instp=IsUnsignedInt(UnaryImm.imm, 32)) +# Sign-extended 32-bit immediate. +I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1)) +# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. +I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1)) # 32-bit shifts and rotates. # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit @@ -73,3 +108,4 @@ I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe)) I32.enc(base.call, *r.call_id(0xe8)) I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2)) I32.enc(base.x_return, *r.ret(0xc3)) +I64.enc(base.x_return, *r.ret(0xc3)) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index ebeacc5968..2d1fc62d80 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -160,6 +160,33 @@ class TailRecipe: emit=replace_put_op(self.emit, name)) return (self.recipes[name], bits) + def rex(self, *ops, **kwargs): + # type: (*int, **int) -> Tuple[EncRecipe, int] + """ + Create a REX encoding recipe and encoding bits for the opcode bytes in + `ops`. + + The recipe will always generate a REX prefix, whether it is required or + not. For instructions that don't require a REX prefix, two encodings + should be added: One with REX and one without. + """ + rrr = kwargs.get('rrr', 0) + w = kwargs.get('w', 0) + name, bits = decode_ops(ops, rrr, w) + name = 'Rex' + name + if name not in self.recipes: + self.recipes[name] = EncRecipe( + name + self.name, + self.format, + 1 + len(ops) + self.size, + ins=self.ins, + outs=self.outs, + branch_range=self.branch_range, + instp=self.instp, + isap=self.isap, + emit=replace_put_op(self.emit, name)) + return (self.recipes[name], bits) + # XX /r rr = TailRecipe( @@ -208,11 +235,21 @@ rid = TailRecipe( sink.put4(imm as u32); ''') -# XX+rd id unary with 32-bit immediate. +# XX /n id with 32-bit immediate sign-extended. UnaryImm version. uid = TailRecipe( - 'uid', UnaryImm, size=4, ins=(), outs=GPR, + 'uid', UnaryImm, size=5, ins=(), outs=GPR, instp=IsSignedInt(UnaryImm.imm, 32), emit=''' + PUT_OP(bits, rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + ''') + +# XX+rd id unary with 32-bit immediate. Note no recipe predicate. +puid = TailRecipe( + 'uid', UnaryImm, size=4, ins=(), outs=GPR, + emit=''' // The destination register is encoded in the low bits of the opcode. // No ModR/M. PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); @@ -220,6 +257,15 @@ uid = TailRecipe( sink.put4(imm as u32); ''') +# XX+rd iq unary with 64-bit immediate. +puiq = TailRecipe( + 'uiq', UnaryImm, size=8, ins=(), outs=GPR, + emit=''' + PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put8(imm as u64); + ''') + # # Store recipes. # diff --git a/lib/cretonne/src/isa/intel/binemit.rs b/lib/cretonne/src/isa/intel/binemit.rs index e2cb5396bc..9db0ee1234 100644 --- a/lib/cretonne/src/isa/intel/binemit.rs +++ b/lib/cretonne/src/isa/intel/binemit.rs @@ -44,6 +44,16 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { BASE_REX | b | (r << 2) } +// Emit a REX prefix. +// +// The R, X, and B bits are computed from registers using the functions above. The W bit is +// extracted from `bits`. +fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(rex & 0xf8, BASE_REX); + let w = ((bits >> 15) & 1) as u8; + sink.put1(rex | (w << 3)); +} + // Emit a single-byte opcode with no REX prefix. fn put_op1(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); @@ -51,6 +61,13 @@ fn put_op1(bits: u16, rex: u8, sink: &mut CS) { sink.put1(bits as u8); } +// Emit a single-byte opcode with REX prefix. +fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*"); + rex_prefix(bits, rex, sink); + sink.put1(bits as u8); +} + // Emit two-byte opcode: 0F XX fn put_op2(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");