Stack overflow checking with stack probes.

This adds a libcall name, a calling convention, and settings for emitting stack probes, and implements them for x86 system_v ABIs.
2018-04-20 21:41:45 -07:00
parent c5b15c2396
commit 3b1d805758
20 changed files with 585 additions and 155 deletions
--- a/cranelift/filetests/isa/x86/binary32.cton
+++ b/cranelift/filetests/isa/x86/binary32.cton
@@ -392,19 +392,37 @@ ebb0:
    ; asm: popl %ecx
    [-,%rcx]            v512 = x86_pop.i32      ; bin: 59
-    ; Adjust Stack Pointer
+    ; Adjust Stack Pointer Up
    ; asm: addl $64, %esp
-    adjust_sp_imm 64                            ; bin: 83 c4 40
+    adjust_sp_up_imm 64                         ; bin: 83 c4 40
    ; asm: addl $-64, %esp
-    adjust_sp_imm -64                           ; bin: 83 c4 c0
+    adjust_sp_up_imm -64                        ; bin: 83 c4 c0
    ; asm: addl $1024, %esp
-    adjust_sp_imm 1024                          ; bin: 81 c4 00000400
+    adjust_sp_up_imm 1024                       ; bin: 81 c4 00000400
    ; asm: addl $-1024, %esp
-    adjust_sp_imm -1024                         ; bin: 81 c4 fffffc00
+    adjust_sp_up_imm -1024                      ; bin: 81 c4 fffffc00
    ; asm: addl $2147483647, %esp
-    adjust_sp_imm 2147483647                    ; bin: 81 c4 7fffffff
+    adjust_sp_up_imm 2147483647                 ; bin: 81 c4 7fffffff
    ; asm: addl $-2147483648, %esp
-    adjust_sp_imm -2147483648                   ; bin: 81 c4 80000000
+    adjust_sp_up_imm -2147483648                ; bin: 81 c4 80000000
    ; Adjust Stack Pointer Down
    ; asm: subl %ecx, %esp
    adjust_sp_down v1                           ; bin: 29 cc
    ; asm: subl %esi, %esp
    adjust_sp_down v2                           ; bin: 29 f4
    ; asm: addl $64, %esp
    adjust_sp_down_imm 64                       ; bin: 83 ec 40
    ; asm: addl $-64, %esp
    adjust_sp_down_imm -64                      ; bin: 83 ec c0
    ; asm: addl $1024, %esp
    adjust_sp_down_imm 1024                     ; bin: 81 ec 00000400
    ; asm: addl $-1024, %esp
    adjust_sp_down_imm -1024                    ; bin: 81 ec fffffc00
    ; asm: addl $2147483647, %esp
    adjust_sp_down_imm 2147483647               ; bin: 81 ec 7fffffff
    ; asm: addl $-2147483648, %esp
    adjust_sp_down_imm -2147483648              ; bin: 81 ec 80000000
    ; Shift immediates
    ; asm: shll $2, %esi
--- a/cranelift/filetests/isa/x86/binary64.cton
+++ b/cranelift/filetests/isa/x86/binary64.cton
@@ -547,19 +547,37 @@ ebb0:
    ; asm: popq %r10
    [-,%r10]            v514 = x86_pop.i64      ; bin: 41 5a
-    ; Adjust Stack Pointer
+    ; Adjust Stack Pointer Up
    ; asm: addq $64, %rsp
-    adjust_sp_imm 64                            ; bin: 48 83 c4 40
+    adjust_sp_up_imm 64                         ; bin: 48 83 c4 40
    ; asm: addq $-64, %rsp
-    adjust_sp_imm -64                           ; bin: 48 83 c4 c0
+    adjust_sp_up_imm -64                        ; bin: 48 83 c4 c0
    ; asm: addq $1024, %rsp
-    adjust_sp_imm 1024                          ; bin: 48 81 c4 00000400
+    adjust_sp_up_imm 1024                       ; bin: 48 81 c4 00000400
    ; asm: addq $-1024, %rsp
-    adjust_sp_imm -1024                         ; bin: 48 81 c4 fffffc00
+    adjust_sp_up_imm -1024                      ; bin: 48 81 c4 fffffc00
    ; asm: addq $2147483647, %rsp
-    adjust_sp_imm 2147483647                    ; bin: 48 81 c4 7fffffff
+    adjust_sp_up_imm 2147483647                 ; bin: 48 81 c4 7fffffff
    ; asm: addq $-2147483648, %rsp
-    adjust_sp_imm -2147483648                   ; bin: 48 81 c4 80000000
+    adjust_sp_up_imm -2147483648                ; bin: 48 81 c4 80000000
    ; Adjust Stack Pointer Down
    ; asm: subq %rcx, %rsp
    adjust_sp_down v1                           ; bin: 48 29 cc
    ; asm: subq %r10, %rsp
    adjust_sp_down v3                           ; bin: 4c 29 d4
    ; asm: subq $64, %rsp
    adjust_sp_down_imm 64                       ; bin: 48 83 ec 40
    ; asm: subq $-64, %rsp
    adjust_sp_down_imm -64                      ; bin: 48 83 ec c0
    ; asm: subq $1024, %rsp
    adjust_sp_down_imm 1024                     ; bin: 48 81 ec 00000400
    ; asm: subq $-1024, %rsp
    adjust_sp_down_imm -1024                    ; bin: 48 81 ec fffffc00
    ; asm: subq $2147483647, %rsp
    adjust_sp_down_imm 2147483647               ; bin: 48 81 ec 7fffffff
    ; asm: subq $-2147483648, %rsp
    adjust_sp_down_imm -2147483648              ; bin: 48 81 ec 80000000
    ; Shift immediates
    ; asm: shlq $12, %rsi
--- a/cranelift/filetests/isa/x86/probestack-adjusts-sp.cton
+++ b/cranelift/filetests/isa/x86/probestack-adjusts-sp.cton
@@ -0,0 +1,29 @@
 test compile
 set is_64bit=1
 set colocated_libcalls=1
 set probestack_func_adjusts_sp=1
 isa x86
 ; Like %big in probestack.cton, but with the probestack function adjusting
 ; the stack pointer itself.
 function %big() system_v {
    ss0 = explicit_slot 300000
 ebb0:
    return
 }
 ; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:    ss0 = explicit_slot 300000, offset -300016
 ; nextln:    ss1 = incoming_arg 16, offset -16
 ; nextln:    sig0 = (i64 [%rax]) probestack
 ; nextln:    fn0 = colocated %Probestack sig0
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 0x0004_93e0
 ; nextln: [Op1call_id#e8]                     call fn0(v1)
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
 ; nextln: [RexOp1popq#58,%rbp]                v2 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v2
 ; nextln: }
--- a/cranelift/filetests/isa/x86/probestack-disabled.cton
+++ b/cranelift/filetests/isa/x86/probestack-disabled.cton
@@ -0,0 +1,25 @@
 test compile
 set is_64bit=1
 set colocated_libcalls=1
 set probestack_enabled=0
 isa x86
 ; Like %big in probestack.cton, but with probes disabled.
 function %big() system_v {
    ss0 = explicit_slot 300000
 ebb0:
    return
 }
 ; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:    ss0 = explicit_slot 300000, offset -300016
 ; nextln:    ss1 = incoming_arg 16, offset -16
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 0x0004_93e0
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
 ; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v1
 ; nextln: }
--- a/cranelift/filetests/isa/x86/probestack-noncolocated.cton
+++ b/cranelift/filetests/isa/x86/probestack-noncolocated.cton
@@ -0,0 +1,28 @@
 test compile
 set is_64bit=1
 isa x86
 ; Like %big in probestack.cton, but without a colocated libcall.
 function %big() system_v {
    ss0 = explicit_slot 300000
 ebb0:
    return
 }
 ; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:    ss0 = explicit_slot 300000, offset -300016
 ; nextln:    ss1 = incoming_arg 16, offset -16
 ; nextln:    sig0 = (i64 [%rax]) -> i64 [%rax] probestack
 ; nextln:    fn0 = %Probestack sig0
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 0x0004_93e0
 ; nextln: [RexOp1fnaddr8#80b8,%r11]           v2 = func_addr.i64 fn0
 ; nextln: [RexOp1call_r#20ff,%rax]            v3 = call_indirect sig0, v2(v1)
 ; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v3
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
 ; nextln: [RexOp1popq#58,%rbp]                v4 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v4
 ; nextln: }
--- a/cranelift/filetests/isa/x86/probestack-size.cton
+++ b/cranelift/filetests/isa/x86/probestack-size.cton
@@ -0,0 +1,75 @@
 test compile
 set is_64bit=1
 set colocated_libcalls=1
 set probestack_size_log2=13
 isa x86
 ; Like %big in probestack.cton, but now the probestack size is bigger
 ; and it no longer needs a probe.
 function %big() system_v {
    ss0 = explicit_slot 4097
 ebb0:
    return
 }
 ; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:     ss0 = explicit_slot 4097, offset -4113
 ; nextln:     ss1 = incoming_arg 16, offset -16
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 4112
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4112
 ; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v1
 ; nextln: }
 ; Like %big; still doesn't need a probe.
 function %bigger() system_v {
    ss0 = explicit_slot 8192
 ebb0:
    return
 }
 ; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:     ss0 = explicit_slot 8192, offset -8208
 ; nextln:     ss1 = incoming_arg 16, offset -16
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 8192
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 8192
 ; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v1
 ; nextln: }
 ; Like %bigger; this needs a probe.
 function %biggest() system_v {
    ss0 = explicit_slot 8193
 ebb0:
    return
 }
 ; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:     ss0 = explicit_slot 8193, offset -8209
 ; nextln:     ss1 = incoming_arg 16, offset -16
 ; nextln:     sig0 = (i64 [%rax]) -> i64 [%rax] probestack
 ; nextln:     fn0 = colocated %Probestack sig0
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 8208
 ; nextln: [Op1call_id#e8,%rax]                v2 = call fn0(v1)
 ; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v2
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 8208
 ; nextln: [RexOp1popq#58,%rbp]                v3 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v3
 ; nextln: }
--- a/cranelift/filetests/isa/x86/probestack.cton
+++ b/cranelift/filetests/isa/x86/probestack.cton
@@ -0,0 +1,50 @@
 test compile
 set is_64bit=1
 set colocated_libcalls=1
 isa x86
 ; A function with a big stack frame. This should have a stack probe.
 function %big() system_v {
    ss0 = explicit_slot 4097
 ebb0:
    return
 }
 ; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:    ss0 = explicit_slot 4097, offset -4113
 ; nextln:    ss1 = incoming_arg 16, offset -16
 ; nextln:    sig0 = (i64 [%rax]) -> i64 [%rax] probestack
 ; nextln:    fn0 = colocated %Probestack sig0
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 4112
 ; nextln: [Op1call_id#e8,%rax]                v2 = call fn0(v1)
 ; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v2
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4112
 ; nextln: [RexOp1popq#58,%rbp]                v3 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v3
 ; nextln: }
 ; A function with a small enough stack frame. This shouldn't have a stack probe.
 function %small() system_v {
    ss0 = explicit_slot 4096
 ebb0:
    return
 }
 ; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
 ; nextln:     ss0 = explicit_slot 4096, offset -4112
 ; nextln:     ss1 = incoming_arg 16, offset -16
 ; nextln: 
 ; nextln:                                 ebb0(v0: i64 [%rbp]):
 ; nextln: [RexOp1pushq#50]                    x86_push v0
 ; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
 ; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 4096
 ; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4096
 ; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
 ; nextln: [Op1ret#c3]                         return v1
 ; nextln: }
--- a/cranelift/filetests/isa/x86/prologue-epilogue.cton
+++ b/cranelift/filetests/isa/x86/prologue-epilogue.cton
@@ -36,8 +36,8 @@ ebb0:
 ; nextln: ebb0(v0: i64 [%rbp]):
 ; nextln:     x86_push v0
 ; nextln:     copy_special %rsp -> %rbp
-; nextln:     adjust_sp_imm -176
+; nextln:     adjust_sp_down_imm 176
-; nextln:     adjust_sp_imm 176
+; nextln:     adjust_sp_up_imm 176
 ; nextln:     v1 = x86_pop.i64
 ; nextln:     return v1
 ; nextln: }
@@ -109,7 +109,7 @@ ebb0(v0: i64, v1: i64):
 ; nextln:     x86_push v18
 ; nextln:     x86_push v19
 ; nextln:     x86_push v20
-; nextln:     adjust_sp_imm -8
+; nextln:     adjust_sp_down_imm 8
 ; nextln:     v2 = load.i32 v0
 ; nextln:     v3 = load.i32 v0+8
 ; nextln:     v4 = load.i32 v0+16
@@ -136,7 +136,7 @@ ebb0(v0: i64, v1: i64):
 ; nextln:     store v12, v1+80
 ; nextln:     store v13, v1+88
 ; nextln:     store v14, v1+96
-; nextln:     adjust_sp_imm 8
+; nextln:     adjust_sp_up_imm 8
 ; nextln:     v26 = x86_pop.i64
 ; nextln:     v25 = x86_pop.i64
 ; nextln:     v24 = x86_pop.i64
@@ -192,13 +192,13 @@ ebb0(v0: i64, v1: i64):
 ; nextln:     x86_push v51
 ; nextln:     x86_push v52
 ; nextln:     x86_push v53
-; nextln:     adjust_sp_imm
+; nextln:     adjust_sp_down_imm
 ; check:      spill
 ; check:      fill
-; check:     adjust_sp_imm
+; check:     adjust_sp_up_imm
 ; nextln:     v59 = x86_pop.i64
 ; nextln:     v58 = x86_pop.i64
 ; nextln:     v57 = x86_pop.i64
--- a/cranelift/filetests/postopt/basic.cton
+++ b/cranelift/filetests/postopt/basic.cton
@@ -10,7 +10,7 @@ ebb0(v0: i32, v1: i32):
 [Op1ret#c3]         return v1
 ebb1:
-[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
 [Op1ret#c3]         return v8
 }
 ; sameln: function %br_icmp
@@ -34,7 +34,7 @@ ebb0(v0: i32, v1: i32):
 [Op1ret#c3]         return v1
 ebb1:
-[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
 [Op1ret#c3]         return v8
 }
 ; sameln: function %br_icmp_inverse
@@ -53,12 +53,12 @@ ebb1:
 function %br_icmp_imm(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
-[Op1icsccib#7083]   v2 = icmp_imm slt v0, 2
+[Op1icscc_ib#7083]  v2 = icmp_imm slt v0, 2
 [Op1t8jccd_long#84] brz v2, ebb1
 [Op1ret#c3]         return v1
 ebb1:
-[Op1puid#b8,%rax]   v8 = iconst.i32 3
+[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
 [Op1ret#c3]         return v8
 }
 ; sameln: function %br_icmp_imm
@@ -82,7 +82,7 @@ ebb0(v0: f32, v1: f32):
 [Op1ret#c3]         return v1
 ebb1:
-[Op1puid#b8,%rax]    v18 = iconst.i32 0x40a8_0000
+[Op1pu_id#b8,%rax]   v18 = iconst.i32 0x40a8_0000
 [Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
 [Op1ret#c3]         return v8
 }
--- a/cranelift/filetests/regalloc/coloring-227.cton
+++ b/cranelift/filetests/regalloc/coloring-227.cton
@@ -7,8 +7,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
    heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000
                                ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64):
-@0001 [RexOp1puid#b8]               v5 = iconst.i32 0
+@0001 [RexOp1pu_id#b8]              v5 = iconst.i32 0
-@0003 [RexOp1puid#b8]               v6 = iconst.i32 0
+@0003 [RexOp1pu_id#b8]              v6 = iconst.i32 0
@0005 [RexOp1tjccb#74]              brz v6, ebb10
@0007 [RexOp1jmpb#eb]               jump ebb3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3)
@@ -16,10 +16,10 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@000b [RexOp1jmpb#eb]               jump ebb6
                                ebb6:
-@000d [RexOp1puid#b8]               v8 = iconst.i32 0
+@000d [RexOp1pu_id#b8]              v8 = iconst.i32 0
@000f [RexOp1tjccb#75]              brnz v8, ebb5
-@0011 [RexOp1puid#b8]               v9 = iconst.i32 0
+@0011 [RexOp1pu_id#b8]              v9 = iconst.i32 0
-@0015 [RexOp1puid#b8]               v11 = iconst.i32 0
+@0015 [RexOp1pu_id#b8]              v11 = iconst.i32 0
@0017 [RexOp1icscc#39]              v12 = icmp.i32 eq v15, v11
@0017 [RexOp2urm_noflags#4b6]       v13 = bint.i32 v12
@001a [RexOp1rr#21]                 v14 = band v9, v13
@@ -28,11 +28,11 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
                                ebb7:
@0020 [RexOp1tjccb#74]              brz.i32 v17, ebb8
-@0022 [RexOp1puid#b8]               v18 = iconst.i32 0
+@0022 [RexOp1pu_id#b8]              v18 = iconst.i32 0
@0024 [RexOp1tjccb#74]              brz v18, ebb9
-@0028 [RexOp1puid#b8]               v21 = iconst.i32 0
+@0028 [RexOp1pu_id#b8]              v21 = iconst.i32 0
@002a [RexOp1umr#89]                v79 = uextend.i64 v5
-@002a [RexOp1rib#8083]              v80 = iadd_imm.i64 v4, 0
+@002a [RexOp1r_ib#8083]             v80 = iadd_imm.i64 v4, 0
@002a [RexOp1ld#808b]               v81 = load.i64 v80
@002a [RexOp1rr#8001]               v22 = iadd v81, v79
@002a [RexMp1st#189]                istore16 v21, v22
@@ -42,8 +42,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@002e [RexOp1jmpb#eb]               jump ebb8
                                ebb8:
-@0033 [RexOp1puid#b8]               v27 = iconst.i32 3
+@0033 [RexOp1pu_id#b8]              v27 = iconst.i32 3
-@0035 [RexOp1puid#b8]               v28 = iconst.i32 4
+@0035 [RexOp1pu_id#b8]              v28 = iconst.i32 4
@003b [RexOp1rr#09]                 v35 = bor.i32 v31, v13
@003c [RexOp1tjccb#75]              brnz v35, ebb15(v27)
@003c [RexOp1jmpb#eb]               jump ebb15(v28)
@@ -58,25 +58,25 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@0045 [RexOp1jmpb#eb]               jump ebb2(v40, v47, v54, v61, v68, v75)
                                ebb10:
-@0046 [RexOp1puid#b8]               v43 = iconst.i32 0
+@0046 [RexOp1pu_id#b8]              v43 = iconst.i32 0
@0048 [RexOp1jmpb#eb]               jump ebb2(v43, v5, v0, v1, v2, v3)
                                ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32):
-@004c [RexOp1puid#b8]               v44 = iconst.i32 0
+@004c [RexOp1pu_id#b8]              v44 = iconst.i32 0
@004e [RexOp1tjccb#74]              brz v44, ebb12
-@0052 [RexOp1puid#b8]               v50 = iconst.i32 11
+@0052 [RexOp1pu_id#b8]              v50 = iconst.i32 11
@0054 [RexOp1tjccb#74]              brz v50, ebb14
@0058 [RexOp1umr#89]                v82 = uextend.i64 v52
-@0058 [RexOp1rib#8083]              v83 = iadd_imm.i64 v4, 0
+@0058 [RexOp1r_ib#8083]             v83 = iadd_imm.i64 v4, 0
@0058 [RexOp1ld#808b]               v84 = load.i64 v83
@0058 [RexOp1rr#8001]               v57 = iadd v84, v82
@0058 [RexOp1ld#8b]                 v58 = load.i32 v57
@005d [RexOp1umr#89]                v85 = uextend.i64 v58
-@005d [RexOp1rib#8083]              v86 = iadd_imm.i64 v4, 0
+@005d [RexOp1r_ib#8083]             v86 = iadd_imm.i64 v4, 0
@005d [RexOp1ld#808b]               v87 = load.i64 v86
@005d [RexOp1rr#8001]               v64 = iadd v87, v85
@005d [RexOp1st#88]                 istore8 v59, v64
-@0060 [RexOp1puid#b8]               v65 = iconst.i32 0
+@0060 [RexOp1pu_id#b8]              v65 = iconst.i32 0
@0062 [RexOp1jmpb#eb]               jump ebb13(v65)
                                ebb14:
@@ -84,7 +84,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
                                ebb13(v51: i32):
@0066 [RexOp1umr#89]                v88 = uextend.i64 v45
-@0066 [RexOp1rib#8083]              v89 = iadd_imm.i64 v4, 0
+@0066 [RexOp1r_ib#8083]             v89 = iadd_imm.i64 v4, 0
@0066 [RexOp1ld#808b]               v90 = load.i64 v89
@0066 [RexOp1rr#8001]               v71 = iadd v90, v88
@0066 [RexOp1st#89]                 store v51, v71
--- a/cranelift/filetests/regalloc/unreachable_code.cton
+++ b/cranelift/filetests/regalloc/unreachable_code.cton
@@ -2,6 +2,7 @@
 test compile
 set is_64bit
 set probestack_enabled=0
 isa x86 haswell
 ; This function contains unreachable blocks which trip up the register
--- a/lib/codegen/meta/base/instructions.py
+++ b/lib/codegen/meta/base/instructions.py
@@ -591,12 +591,25 @@ stack_check = Instruction(
    The global variable must be accessible and naturally aligned for a
    pointer-sized value.
    `stack_check` is an alternative way to detect stack overflow, when using
    a calling convention that doesn't perform stack probes.
    """,
    ins=GV, can_trap=True)
 delta = Operand('delta', Int)
 adjust_sp_down = Instruction(
    'adjust_sp_down', r"""
    Subtracts ``delta`` offset value from the stack pointer register.
    This instruction is used to adjust the stack pointer by a dynamic amount.
    """,
    ins=(delta,),
    other_side_effects=True)
 StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
-adjust_sp_imm = Instruction(
+adjust_sp_up_imm = Instruction(
-    'adjust_sp_imm', r"""
+    'adjust_sp_up_imm', r"""
    Adds ``Offset`` immediate offset value to the stack pointer register.
    This instruction is used to adjust the stack pointer, primarily in function
@@ -606,6 +619,19 @@ adjust_sp_imm = Instruction(
    ins=(StackOffset,),
    other_side_effects=True)
 StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
 adjust_sp_down_imm = Instruction(
    'adjust_sp_down_imm', r"""
    Subtracts ``Offset`` immediate offset value from the stack pointer
    register.
    This instruction is used to adjust the stack pointer, primarily in function
    prologues and epilogues. ``Offset`` is constrained to the size of a signed
    32-bit integer.
    """,
    ins=(StackOffset,),
    other_side_effects=True)
 f = Operand('f', iflags)
 ifcmp_sp = Instruction(
--- a/lib/codegen/meta/base/settings.py
+++ b/lib/codegen/meta/base/settings.py
@@ -38,17 +38,27 @@ call_conv = EnumSetting(
        - system_v: System V-style convention used on many platforms
        - fastcall: Windows "fastcall" convention, also used for x64 and ARM
        - baldrdash: SpiderMonkey WebAssembly convention
        - probestack: specialized convention for the probestack function
        The default calling convention may be overridden by individual
        functions.
        """,
-        'fast', 'cold', 'system_v', 'fastcall', 'baldrdash')
+        'fast', 'cold', 'system_v', 'fastcall', 'baldrdash', 'probestack')
 # Note that Cretonne doesn't currently need an is_pie flag, because PIE is just
 # PIC where symbols can't be pre-empted, which can be expressed with the
 # `colocated` flag on external functions and global variables.
 is_pic = BoolSetting("Enable Position-Independent Code generation")
 colocated_libcalls = BoolSetting(
        """
        Use colocated libcalls.
        Generate code that assumes that libcalls can be declared "colocated",
        meaning they will be defined along with the current function, such that
        they can use more efficient addressing.
        """)
 return_at_end = BoolSetting(
        """
        Generate functions with at most a single return instruction at the
@@ -115,4 +125,31 @@ allones_funcaddrs = BoolSetting(
        Emit not-yet-relocated function addresses as all-ones bit patterns.
        """)
 #
 # Stack probing options.
 #
 probestack_enabled = BoolSetting(
        """
        Enable the use of stack probes, for calling conventions which support
        this functionality.
        """,
        default=True)
 probestack_func_adjusts_sp = BoolSetting(
        """
        Set this to true of the stack probe function modifies the stack pointer
        itself.
        """)
 probestack_size_log2 = NumSetting(
        """
        The log2 of the size of the stack guard region.
        Stack frames larger than this size will have stack overflow checked
        by calling the probestack function.
        The default is 12, which translates to a size of 4096.
        """,
        default=12)
 group.close(globals())
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -136,29 +136,29 @@ for inst,               rrr in [
        (base.band_imm, 4),
        (base.bor_imm,  1),
        (base.bxor_imm, 6)]:
-    enc_i32_i64(inst, r.rib, 0x83, rrr=rrr)
+    enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
-    enc_i32_i64(inst, r.rid, 0x81, rrr=rrr)
+    enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
 # TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
 # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
 # Immediate constants.
-X86_32.enc(base.iconst.i32, *r.puid(0xb8))
+X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
-X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8))
+X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
-X86_64.enc(base.iconst.i32, *r.puid(0xb8))
+X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
 # The 32-bit immediate movl also zero-extends to 64 bits.
-X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8),
+X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
-X86_64.enc(base.iconst.i64, *r.puid(0xb8),
+X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
 # Sign-extended 32-bit immediate.
-X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
+X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
-X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
 # bool constants.
-enc_both(base.bconst.b1, r.puid_bool, 0xb8)
+enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
 # Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -180,7 +180,7 @@ for inst,           rrr in [
        (base.ishl_imm, 4),
        (base.ushr_imm, 5),
        (base.sshr_imm, 7)]:
-    enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr)
+    enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
 # Population count.
 X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
@@ -254,11 +254,21 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58)
 X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
 X86_32.enc(base.copy_special, *r.copysp(0x89))
-# Adjust SP Imm
+# Adjust SP down by a dynamic value (or up, with a negative operand).
-X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
+X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
-X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
+X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
-X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
+
-X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))
+# Adjust SP up by an immediate (or down, with a negative immediate)
 X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
 X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
 X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
 X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
 # Adjust SP down by an immediate (or up, with a negative immediate)
 X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
 X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
 X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
 X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
 #
 # Float loads and stores.
@@ -406,11 +416,11 @@ X86_64.enc(base.trapff, r.trapff, 0)
 # Comparisons
 #
 enc_i32_i64(base.icmp, r.icscc, 0x39)
-enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
-enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
 enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
-enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
+enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
-enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
+enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
 # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
 X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
--- a/lib/codegen/meta/isa/x86/recipes.py
+++ b/lib/codegen/meta/isa/x86/recipes.py
@@ -480,8 +480,8 @@ mulx = TailRecipe(
        ''')
 # XX /n ib with 8-bit immediate sign-extended.
-rib = TailRecipe(
+r_ib = TailRecipe(
-        'rib', BinaryImm, size=2, ins=GPR, outs=0,
+        'r_ib', BinaryImm, size=2, ins=GPR, outs=0,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -491,8 +491,8 @@ rib = TailRecipe(
        ''')
 # XX /n id with 32-bit immediate sign-extended.
-rid = TailRecipe(
+r_id = TailRecipe(
-        'rid', BinaryImm, size=5, ins=GPR, outs=0,
+        'r_id', BinaryImm, size=5, ins=GPR, outs=0,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -502,8 +502,8 @@ rid = TailRecipe(
        ''')
 # XX /n id with 32-bit immediate sign-extended. UnaryImm version.
-uid = TailRecipe(
+u_id = TailRecipe(
-        'uid', UnaryImm, size=5, ins=(), outs=GPR,
+        'u_id', UnaryImm, size=5, ins=(), outs=GPR,
        instp=IsSignedInt(UnaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(out_reg0), sink);
@@ -513,8 +513,8 @@ uid = TailRecipe(
        ''')
 # XX+rd id unary with 32-bit immediate. Note no recipe predicate.
-puid = TailRecipe(
+pu_id = TailRecipe(
-        'puid', UnaryImm, size=4, ins=(), outs=GPR,
+        'pu_id', UnaryImm, size=4, ins=(), outs=GPR,
        emit='''
        // The destination register is encoded in the low bits of the opcode.
        // No ModR/M.
@@ -524,8 +524,8 @@ puid = TailRecipe(
        ''')
 # XX+rd id unary with bool immediate. Note no recipe predicate.
-puid_bool = TailRecipe(
+pu_id_bool = TailRecipe(
-        'puid_bool', UnaryBool, size=4, ins=(), outs=GPR,
+        'pu_id_bool', UnaryBool, size=4, ins=(), outs=GPR,
        emit='''
        // The destination register is encoded in the low bits of the opcode.
        // No ModR/M.
@@ -535,8 +535,8 @@ puid_bool = TailRecipe(
        ''')
 # XX+rd iq unary with 64-bit immediate.
-puiq = TailRecipe(
+pu_iq = TailRecipe(
-        'puiq', UnaryImm, size=8, ins=(), outs=GPR,
+        'pu_iq', UnaryImm, size=8, ins=(), outs=GPR,
        emit='''
        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
        let imm: i64 = imm.into();
@@ -564,8 +564,15 @@ copysp = TailRecipe(
        modrm_rr(dst, src, sink);
        ''')
-adjustsp8 = TailRecipe(
+adjustsp = TailRecipe(
-    'adjustsp8', UnaryImm, size=2, ins=(), outs=(),
+    'adjustsp', Unary, size=1, ins=(GPR), outs=(),
    emit='''
    PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink);
    modrm_rr(RU::rsp.into(), in_reg0, sink);
    ''')
 adjustsp_ib = TailRecipe(
    'adjustsp_ib', UnaryImm, size=2, ins=(), outs=(),
    instp=IsSignedInt(UnaryImm.imm, 8),
    emit='''
    PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -574,8 +581,8 @@ adjustsp8 = TailRecipe(
    sink.put1(imm as u8);
    ''')
-adjustsp32 = TailRecipe(
+adjustsp_id = TailRecipe(
-    'adjustsp32', UnaryImm, size=5, ins=(), outs=(),
+    'adjustsp_id', UnaryImm, size=5, ins=(), outs=(),
    instp=IsSignedInt(UnaryImm.imm, 32),
    emit='''
    PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -1217,8 +1224,8 @@ fcmp = TailRecipe(
        ''')
 # XX /n, MI form with imm8.
-rcmpib = TailRecipe(
+rcmp_ib = TailRecipe(
-        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
+        'rcmp_ib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1228,8 +1235,8 @@ rcmpib = TailRecipe(
        ''')
 # XX /n, MI form with imm32.
-rcmpid = TailRecipe(
+rcmp_id = TailRecipe(
-        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
+        'rcmp_id', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1401,8 +1408,8 @@ icscc = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')
-icsccib = TailRecipe(
+icscc_ib = TailRecipe(
-        'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
+        'icscc_ib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
        instp=IsSignedInt(IntCompareImm.imm, 8),
        emit='''
        // Comparison instruction.
@@ -1429,8 +1436,8 @@ icsccib = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')
-icsccid = TailRecipe(
+icscc_id = TailRecipe(
-        'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
+        'icscc_id', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
        instp=IsSignedInt(IntCompareImm.imm, 32),
        emit='''
        // Comparison instruction.
--- a/lib/codegen/src/ir/libcall.rs
+++ b/lib/codegen/src/ir/libcall.rs
@@ -1,6 +1,9 @@
 //! Naming well-known routines in the runtime library.
-use ir::{types, Opcode, Type};
+use ir::{types, Opcode, Type, Inst, Function, FuncRef, ExternalName, Signature, AbiParam,
         ExtFuncData, ArgumentPurpose};
 use settings::CallConv;
 use isa::{TargetIsa, RegUnit};
 use std::fmt;
 use std::str::FromStr;
@@ -14,6 +17,9 @@ use std::str::FromStr;
 /// This list is likely to grow over time.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum LibCall {
    /// probe for stack overflow. These are emitted for functions which need
    /// when the `probestack_enabled` setting is true.
    Probestack,
    /// ceil.f32
    CeilF32,
    /// ceil.f64
@@ -32,7 +38,8 @@ pub enum LibCall {
    NearestF64,
 }
-const NAME: [&str; 8] = [
+const NAME: [&str; 9] = [
    "Probestack",
    "CeilF32",
    "CeilF64",
    "FloorF32",
@@ -54,6 +61,7 @@ impl FromStr for LibCall {
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "Probestack" => Ok(LibCall::Probestack),
            "CeilF32" => Ok(LibCall::CeilF32),
            "CeilF64" => Ok(LibCall::CeilF64),
            "FloorF32" => Ok(LibCall::FloorF32),
@@ -97,6 +105,96 @@ impl LibCall {
    }
 }
 /// Get a function reference for `libcall` in `func`, following the signature
 /// for `inst`.
 ///
 /// If there is an existing reference, use it, otherwise make a new one.
 pub fn get_libcall_funcref(
    libcall: LibCall,
    func: &mut Function,
    inst: Inst,
    isa: &TargetIsa,
 ) -> FuncRef {
    find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
 }
 /// Get a function reference for the probestack function in `func`.
 ///
 /// If there is an existing reference, use it, otherwise make a new one.
 pub fn get_probestack_funcref(
    func: &mut Function,
    reg_type: Type,
    arg_reg: RegUnit,
    isa: &TargetIsa,
 ) -> FuncRef {
    find_funcref(LibCall::Probestack, func).unwrap_or_else(|| {
        make_funcref_for_probestack(func, reg_type, arg_reg, isa)
    })
 }
 /// Get the existing function reference for `libcall` in `func` if it exists.
 fn find_funcref(libcall: LibCall, func: &Function) -> Option<FuncRef> {
    // We're assuming that all libcall function decls are at the end.
    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
        match func_data.name {
            ExternalName::LibCall(lc) => {
                if lc == libcall {
                    return Some(fref);
                }
            }
            _ => break,
        }
    }
    None
 }
 /// Create a funcref for `LibCall::Probestack`.
 fn make_funcref_for_probestack(
    func: &mut Function,
    reg_type: Type,
    arg_reg: RegUnit,
    isa: &TargetIsa,
 ) -> FuncRef {
    let mut sig = Signature::new(CallConv::Probestack);
    let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg);
    sig.params.push(rax);
    if !isa.flags().probestack_func_adjusts_sp() {
        sig.returns.push(rax);
    }
    make_funcref(LibCall::Probestack, func, sig, isa)
 }
 /// Create a funcref for `libcall` with a signature matching `inst`.
 fn make_funcref_for_inst(
    libcall: LibCall,
    func: &mut Function,
    inst: Inst,
    isa: &TargetIsa,
 ) -> FuncRef {
    // Start with a fast calling convention. We'll give the ISA a chance to change it.
    let mut sig = Signature::new(isa.flags().call_conv());
    for &v in func.dfg.inst_args(inst) {
        sig.params.push(AbiParam::new(func.dfg.value_type(v)));
    }
    for &v in func.dfg.inst_results(inst) {
        sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
    }
    make_funcref(libcall, func, sig, isa)
 }
 /// Create a funcref for `libcall`.
 fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef {
    let sigref = func.import_signature(sig);
    func.import_function(ExtFuncData {
        name: ExternalName::LibCall(libcall),
        signature: sigref,
        colocated: isa.flags().colocated_libcalls(),
    })
 }
 #[cfg(test)]
 mod test {
    use super::*;
--- a/lib/codegen/src/ir/mod.rs
+++ b/lib/codegen/src/ir/mod.rs
@@ -33,7 +33,7 @@ pub use ir::heap::{HeapBase, HeapData, HeapStyle};
 pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
 pub use ir::jumptable::JumpTableData;
 pub use ir::layout::Layout;
-pub use ir::libcall::LibCall;
+pub use ir::libcall::{LibCall, get_libcall_funcref, get_probestack_funcref};
 pub use ir::memflags::MemFlags;
 pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
 pub use ir::sourceloc::SourceLoc;
--- a/lib/codegen/src/isa/x86/abi.rs
+++ b/lib/codegen/src/isa/x86/abi.rs
@@ -6,7 +6,8 @@ use cursor::{Cursor, CursorPosition, EncCursor};
 use ir;
 use ir::immediates::Imm64;
 use ir::stackslot::{StackOffset, StackSize};
-use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc,
         get_probestack_funcref};
 use isa::{RegClass, RegUnit, TargetIsa};
 use regalloc::RegisterSet;
 use result;
@@ -216,10 +217,16 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::Ct
        }
        CallConv::Fastcall => unimplemented!("Windows calling conventions"),
        CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
        CallConv::Probestack => unimplemented!("probestack calling convention"),
    }
 }
 pub fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
    debug_assert!(
        !isa.flags().probestack_enabled(),
        "baldrdash does not expect cretonne to emit stack probes"
    );
    // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
    let stack_align = 16;
    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
@@ -239,7 +246,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    // newer versions use a 16-byte aligned stack pointer.
    let stack_align = 16;
    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
-    let csr_type = if isa.flags().is_64bit() {
+    let reg_type = if isa.flags().is_64bit() {
        ir::types::I64
    } else {
        ir::types::I32
@@ -266,7 +273,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    // Add CSRs to function signature
    let fp_arg = ir::AbiParam::special_reg(
-        csr_type,
+        reg_type,
        ir::ArgumentPurpose::FramePointer,
        RU::rbp as RegUnit,
    );
@@ -274,7 +281,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    func.signature.returns.push(fp_arg);
    for csr in csrs.iter(GPR) {
-        let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
        func.signature.params.push(csr_arg);
        func.signature.returns.push(csr_arg);
    }
@@ -282,11 +289,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    // Set up the cursor and insert the prologue
    let entry_ebb = func.layout.entry_block().expect("missing entry block");
    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
-    insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
+    insert_system_v_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
    // Reset the cursor and insert the epilogue
    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
+    insert_system_v_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
    Ok(())
 }
@@ -295,12 +302,13 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
 fn insert_system_v_prologue(
    pos: &mut EncCursor,
    stack_size: i64,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
    isa: &TargetIsa,
 ) {
    // Append param to entry EBB
    let ebb = pos.current_ebb().expect("missing ebb under cursor");
-    let fp = pos.func.dfg.append_ebb_param(ebb, csr_type);
+    let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
    pos.ins().x86_push(fp);
@@ -311,7 +319,7 @@ fn insert_system_v_prologue(
    for reg in csrs.iter(GPR) {
        // Append param to entry EBB
-        let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
+        let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);
        // Assign it a location
        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
@@ -320,8 +328,48 @@ fn insert_system_v_prologue(
        pos.ins().x86_push(csr_arg);
    }
    // Allocate stack frame storage.
    if stack_size > 0 {
-        pos.ins().adjust_sp_imm(Imm64::new(-stack_size));
+        if isa.flags().probestack_enabled() &&
            stack_size > (1 << isa.flags().probestack_size_log2())
        {
            // Emit a stack probe.
            let rax = RU::rax as RegUnit;
            let rax_val = ir::ValueLoc::Reg(rax);
            // The probestack function expects its input in %rax.
            let arg = pos.ins().iconst(reg_type, stack_size);
            pos.func.locations[arg] = rax_val;
            // Call the probestack function.
            let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
            // Make the call.
            let call = if !isa.flags().is_pic() && isa.flags().is_64bit() &&
                !pos.func.dfg.ext_funcs[callee].colocated
            {
                // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
                // Use r11 as it may be clobbered under all supported calling conventions.
                let r11 = RU::r11 as RegUnit;
                let sig = pos.func.dfg.ext_funcs[callee].signature;
                let addr = pos.ins().func_addr(reg_type, callee);
                pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
                pos.ins().call_indirect(sig, addr, &[arg])
            } else {
                // Otherwise just do a normal call.
                pos.ins().call(callee, &[arg])
            };
            // If the probestack function doesn't adjust sp, do it ourselves.
            if !isa.flags().probestack_func_adjusts_sp() {
                let result = pos.func.dfg.inst_results(call)[0];
                pos.func.locations[result] = rax_val;
                pos.ins().adjust_sp_down(result);
            }
        } else {
            // Simply decrement the stack pointer.
            pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
        }
    }
 }
@@ -329,14 +377,14 @@ fn insert_system_v_prologue(
 fn insert_system_v_epilogues(
    pos: &mut EncCursor,
    stack_size: i64,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
 ) {
    while let Some(ebb) = pos.next_ebb() {
        pos.goto_last_inst(ebb);
        if let Some(inst) = pos.current_inst() {
            if pos.func.dfg[inst].opcode().is_return() {
-                insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
+                insert_system_v_epilogue(inst, stack_size, pos, reg_type, csrs);
            }
        }
    }
@@ -347,23 +395,23 @@ fn insert_system_v_epilogue(
    inst: ir::Inst,
    stack_size: i64,
    pos: &mut EncCursor,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
 ) {
    if stack_size > 0 {
-        pos.ins().adjust_sp_imm(Imm64::new(stack_size));
+        pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
    }
    // Pop all the callee-saved registers, stepping backward each time to
    // preserve the correct order.
-    let fp_ret = pos.ins().x86_pop(csr_type);
+    let fp_ret = pos.ins().x86_pop(reg_type);
    pos.prev_inst();
    pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
    pos.func.dfg.append_inst_arg(inst, fp_ret);
    for reg in csrs.iter(GPR) {
-        let csr_ret = pos.ins().x86_pop(csr_type);
+        let csr_ret = pos.ins().x86_pop(reg_type);
        pos.prev_inst();
        pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
--- a/lib/codegen/src/legalizer/libcall.rs
+++ b/lib/codegen/src/legalizer/libcall.rs
@@ -1,7 +1,7 @@
 //! Expanding instructions as runtime library calls.
 use ir;
-use ir::InstBuilder;
+use ir::{InstBuilder, get_libcall_funcref};
 use std::vec::Vec;
 use isa::TargetIsa;
@@ -14,58 +14,14 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIs
            None => return false,
        };
    let funcref =
        find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func, isa));
    // Now we convert `inst` to a call. First save the arguments.
    let mut args = Vec::new();
    args.extend_from_slice(func.dfg.inst_args(inst));
    // The replace builder will preserve the instruction result values.
    let funcref = get_libcall_funcref(libcall, func, inst, isa);
    func.dfg.replace(inst).call(funcref, &args);
    // TODO: ask the ISA to legalize the signature.
    true
 }
 /// Get the existing function reference for `libcall` in `func` if it exists.
 fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
    // We're assuming that all libcall function decls are at the end.
    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
        match func_data.name {
            ir::ExternalName::LibCall(lc) => {
                if lc == libcall {
                    return Some(fref);
                }
            }
            _ => break,
        }
    }
    None
 }
 /// Create a funcref for `libcall` with a signature matching `inst`.
 fn make_funcref(
    libcall: ir::LibCall,
    inst: ir::Inst,
    func: &mut ir::Function,
    isa: &TargetIsa,
 ) -> ir::FuncRef {
    // Start with a fast calling convention. We'll give the ISA a chance to change it.
    let mut sig = ir::Signature::new(isa.flags().call_conv());
    for &v in func.dfg.inst_args(inst) {
        sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
    }
    for &v in func.dfg.inst_results(inst) {
        sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v)));
    }
    let sigref = func.import_signature(sig);
    // TODO: Can libcalls be colocated in some circumstances?
    func.import_function(ir::ExtFuncData {
        name: ir::ExternalName::LibCall(libcall),
        signature: sigref,
        colocated: false,
    })
 }
--- a/lib/codegen/src/settings.rs
+++ b/lib/codegen/src/settings.rs
@@ -363,6 +363,7 @@ mod tests {
             is_64bit = false\n\
             call_conv = \"fast\"\n\
             is_pic = false\n\
             colocated_libcalls = false\n\
             return_at_end = false\n\
             avoid_div_traps = false\n\
             is_compressed = false\n\
@@ -370,7 +371,10 @@ mod tests {
             enable_simd = true\n\
             enable_atomics = true\n\
             baldrdash_prologue_words = 0\n\
-             allones_funcaddrs = false\n"
+             allones_funcaddrs = false\n\
             probestack_enabled = true\n\
             probestack_func_adjusts_sp = false\n\
             probestack_size_log2 = 12\n"
        );
        assert_eq!(f.opt_level(), super::OptLevel::Default);
        assert_eq!(f.enable_simd(), true);