diff --git a/cranelift/filetests/isa/x86/binary64-pic.cton b/cranelift/filetests/isa/x86/binary64-pic.cton index f3b838795a..cc592e0188 100644 --- a/cranelift/filetests/isa/x86/binary64-pic.cton +++ b/cranelift/filetests/isa/x86/binary64-pic.cton @@ -28,18 +28,17 @@ function %I64() { ebb0: - ; asm: call foo@PLT - call fn0() ; bin: e8 PLTRel4(%foo-4) 00000000 + ; Colocated functions. ; asm: call foo call fn1() ; bin: e8 PCRel4(%bar-4) 00000000 - ; asm: mov 0x0(%rip), %rax - [-,%rax] v0 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v1 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v2 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 + ; asm: lea 0x0(%rip), %rax + [-,%rax] v0 = func_addr.i64 fn1 ; bin: 48 8d 05 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %rsi + [-,%rsi] v1 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %r10 + [-,%r10] v2 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 ; asm: call *%rax call_indirect sig0, v0() ; bin: ff d0 @@ -48,6 +47,25 @@ ebb0: ; asm: call *%r10 call_indirect sig0, v2() ; bin: 41 ff d2 + ; Non-colocated functions. + + ; asm: call foo@PLT + call fn0() ; bin: e8 PLTRel4(%foo-4) 00000000 + + ; asm: mov 0x0(%rip), %rax + [-,%rax] v100 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 + ; asm: mov 0x0(%rip), %rsi + [-,%rsi] v101 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 + ; asm: mov 0x0(%rip), %r10 + [-,%r10] v102 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 + + ; asm: call *%rax + call_indirect sig0, v100() ; bin: ff d0 + ; asm: call *%rsi + call_indirect sig0, v101() ; bin: ff d6 + ; asm: call *%r10 + call_indirect sig0, v102() ; bin: 41 ff d2 + ; asm: mov 0x0(%rip), %rcx [-,%rcx] v3 = globalsym_addr.i64 gv0 ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000 ; asm: mov 0x0(%rip), %rsi diff --git a/cranelift/filetests/isa/x86/binary64.cton b/cranelift/filetests/isa/x86/binary64.cton index 6ecc086231..faf4fe99ac 100644 --- a/cranelift/filetests/isa/x86/binary64.cton +++ b/cranelift/filetests/isa/x86/binary64.cton @@ -13,6 +13,7 @@ isa x86 haswell function %I64() { sig0 = () fn0 = %foo() + fn1 = colocated %bar() gv0 = globalsym %some_gv @@ -473,12 +474,17 @@ ebb0: ; asm: movzbq %dl, %rsi [-,%rsi] v351 = bint.i64 v301 ; bin: 0f b6 f2 - ; asm: movabsq $0, %rcx - [-,%rcx] v400 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %rsi - [-,%rsi] v401 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %r10 - [-,%r10] v402 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 + ; Colocated functions. + + ; asm: call bar + ; call fn1() ; bin: e8 PCRel4(%bar-4) 00000000 + + ; asm: lea 0x0(%rip), %rcx + [-,%rcx] v400 = func_addr.i64 fn1 ; bin: 48 8d 0d PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %rsi + [-,%rsi] v401 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %r10 + [-,%r10] v402 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 ; asm: call *%rcx call_indirect sig0, v400() ; bin: ff d1 @@ -487,6 +493,22 @@ ebb0: ; asm: call *%r10 call_indirect sig0, v402() ; bin: 41 ff d2 + ; Non-colocated functions. Note that there is no non-colocated non-PIC call. + + ; asm: movabsq $0, %rcx + [-,%rcx] v410 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 + ; asm: movabsq $0, %rsi + [-,%rsi] v411 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 + ; asm: movabsq $0, %r10 + [-,%r10] v412 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 + + ; asm: call *%rcx + call_indirect sig0, v410() ; bin: ff d1 + ; asm: call *%rsi + call_indirect sig0, v411() ; bin: ff d6 + ; asm: call *%r10 + call_indirect sig0, v412() ; bin: 41 ff d2 + ; asm: movabsq $-1, %rcx [-,%rcx] v450 = globalsym_addr.i64 gv0 ; bin: 48 b9 Abs8(%some_gv) 0000000000000000 ; asm: movabsq $-1, %rsi @@ -548,9 +570,9 @@ ebb0: [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 ; asm: sarq $33, %r8 [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 - ; asm: shrl $62, %rsi + ; asm: shrq $62, %rsi [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e - ; asm: shrl $63, %r8 + ; asm: shrq $63, %r8 [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f @@ -1040,7 +1062,7 @@ ebb0: ; asm: setl %bl [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3 - ; asm: cmpq $100000, %ecx + ; asm: cmpl $100000, %ecx ; asm: setl %bl [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3 diff --git a/lib/cretonne/meta/isa/x86/encodings.py b/lib/cretonne/meta/isa/x86/encodings.py index c120fe1a58..5ab6636a44 100644 --- a/lib/cretonne/meta/isa/x86/encodings.py +++ b/lib/cretonne/meta/isa/x86/encodings.py @@ -305,10 +305,12 @@ X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8), X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1), isap=And(allones_funcaddrs, Not(is_pic))) -# PIC +# 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's +# pc-relative field. X86_64.enc(base.func_addr.i64, *r.pcrel_fnaddr8.rex(0x8d, w=1), - isap=is_pic, instp=IsColocatedFunc(FuncAddr.func_ref)) + +# 64-bit, non-colocated, PIC. X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1), isap=is_pic) @@ -338,12 +340,14 @@ X86_64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1), # 32-bit, both PIC and non-PIC. X86_32.enc(base.call, *r.call_id(0xe8)) -# 64-bit, PIC, colocated and non-colocated. There is no 64-bit non-PIC, since -# non-PIC is currently using the large model, which requires calls be lowered -# to func_addr+call_indirect. +# 64-bit, colocated, both PIC and non-PIC. Use the call instruction's +# pc-relative field. X86_64.enc(base.call, *r.call_id(0xe8), - isap=is_pic, instp=IsColocatedFunc(Call.func_ref)) + +# 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, +# since non-PIC is currently using the large model, which requires calls be +# lowered to func_addr+call_indirect. X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic) X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))