Stack overflow checking with stack probes.

This adds a libcall name, a calling convention, and settings for
emitting stack probes, and implements them for x86 system_v ABIs.
This commit is contained in:
Dan Gohman
2018-04-20 21:41:45 -07:00
parent c5b15c2396
commit 3b1d805758
20 changed files with 585 additions and 155 deletions

View File

@@ -392,19 +392,37 @@ ebb0:
; asm: popl %ecx ; asm: popl %ecx
[-,%rcx] v512 = x86_pop.i32 ; bin: 59 [-,%rcx] v512 = x86_pop.i32 ; bin: 59
; Adjust Stack Pointer ; Adjust Stack Pointer Up
; asm: addl $64, %esp ; asm: addl $64, %esp
adjust_sp_imm 64 ; bin: 83 c4 40 adjust_sp_up_imm 64 ; bin: 83 c4 40
; asm: addl $-64, %esp ; asm: addl $-64, %esp
adjust_sp_imm -64 ; bin: 83 c4 c0 adjust_sp_up_imm -64 ; bin: 83 c4 c0
; asm: addl $1024, %esp ; asm: addl $1024, %esp
adjust_sp_imm 1024 ; bin: 81 c4 00000400 adjust_sp_up_imm 1024 ; bin: 81 c4 00000400
; asm: addl $-1024, %esp ; asm: addl $-1024, %esp
adjust_sp_imm -1024 ; bin: 81 c4 fffffc00 adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00
; asm: addl $2147483647, %esp ; asm: addl $2147483647, %esp
adjust_sp_imm 2147483647 ; bin: 81 c4 7fffffff adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff
; asm: addl $-2147483648, %esp ; asm: addl $-2147483648, %esp
adjust_sp_imm -2147483648 ; bin: 81 c4 80000000 adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000
; Adjust Stack Pointer Down
; asm: subl %ecx, %esp
adjust_sp_down v1 ; bin: 29 cc
; asm: subl %esi, %esp
adjust_sp_down v2 ; bin: 29 f4
; asm: addl $64, %esp
adjust_sp_down_imm 64 ; bin: 83 ec 40
; asm: addl $-64, %esp
adjust_sp_down_imm -64 ; bin: 83 ec c0
; asm: addl $1024, %esp
adjust_sp_down_imm 1024 ; bin: 81 ec 00000400
; asm: addl $-1024, %esp
adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00
; asm: addl $2147483647, %esp
adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff
; asm: addl $-2147483648, %esp
adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000
; Shift immediates ; Shift immediates
; asm: shll $2, %esi ; asm: shll $2, %esi

View File

@@ -547,19 +547,37 @@ ebb0:
; asm: popq %r10 ; asm: popq %r10
[-,%r10] v514 = x86_pop.i64 ; bin: 41 5a [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a
; Adjust Stack Pointer ; Adjust Stack Pointer Up
; asm: addq $64, %rsp ; asm: addq $64, %rsp
adjust_sp_imm 64 ; bin: 48 83 c4 40 adjust_sp_up_imm 64 ; bin: 48 83 c4 40
; asm: addq $-64, %rsp ; asm: addq $-64, %rsp
adjust_sp_imm -64 ; bin: 48 83 c4 c0 adjust_sp_up_imm -64 ; bin: 48 83 c4 c0
; asm: addq $1024, %rsp ; asm: addq $1024, %rsp
adjust_sp_imm 1024 ; bin: 48 81 c4 00000400 adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400
; asm: addq $-1024, %rsp ; asm: addq $-1024, %rsp
adjust_sp_imm -1024 ; bin: 48 81 c4 fffffc00 adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00
; asm: addq $2147483647, %rsp ; asm: addq $2147483647, %rsp
adjust_sp_imm 2147483647 ; bin: 48 81 c4 7fffffff adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff
; asm: addq $-2147483648, %rsp ; asm: addq $-2147483648, %rsp
adjust_sp_imm -2147483648 ; bin: 48 81 c4 80000000 adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000
; Adjust Stack Pointer Down
; asm: subq %rcx, %rsp
adjust_sp_down v1 ; bin: 48 29 cc
; asm: subq %r10, %rsp
adjust_sp_down v3 ; bin: 4c 29 d4
; asm: subq $64, %rsp
adjust_sp_down_imm 64 ; bin: 48 83 ec 40
; asm: subq $-64, %rsp
adjust_sp_down_imm -64 ; bin: 48 83 ec c0
; asm: subq $1024, %rsp
adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400
; asm: subq $-1024, %rsp
adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00
; asm: subq $2147483647, %rsp
adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff
; asm: subq $-2147483648, %rsp
adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000
; Shift immediates ; Shift immediates
; asm: shlq $12, %rsi ; asm: shlq $12, %rsi

View File

@@ -0,0 +1,29 @@
test compile
set is_64bit=1
set colocated_libcalls=1
set probestack_func_adjusts_sp=1
isa x86
; Like %big in probestack.cton, but with the probestack function adjusting
; the stack pointer itself.
function %big() system_v {
ss0 = explicit_slot 300000
ebb0:
return
}
; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 300000, offset -300016
; nextln: ss1 = incoming_arg 16, offset -16
; nextln: sig0 = (i64 [%rax]) probestack
; nextln: fn0 = colocated %Probestack sig0
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0
; nextln: [Op1call_id#e8] call fn0(v1)
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0
; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64
; nextln: [Op1ret#c3] return v2
; nextln: }

View File

@@ -0,0 +1,25 @@
test compile
set is_64bit=1
set colocated_libcalls=1
set probestack_enabled=0
isa x86
; Like %big in probestack.cton, but with probes disabled.
function %big() system_v {
ss0 = explicit_slot 300000
ebb0:
return
}
; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 300000, offset -300016
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0
; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64
; nextln: [Op1ret#c3] return v1
; nextln: }

View File

@@ -0,0 +1,28 @@
test compile
set is_64bit=1
isa x86
; Like %big in probestack.cton, but without a colocated libcall.
function %big() system_v {
ss0 = explicit_slot 300000
ebb0:
return
}
; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 300000, offset -300016
; nextln: ss1 = incoming_arg 16, offset -16
; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack
; nextln: fn0 = %Probestack sig0
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0
; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0
; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1)
; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0
; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64
; nextln: [Op1ret#c3] return v4
; nextln: }

View File

@@ -0,0 +1,75 @@
test compile
set is_64bit=1
set colocated_libcalls=1
set probestack_size_log2=13
isa x86
; Like %big in probestack.cton, but now the probestack size is bigger
; and it no longer needs a probe.
function %big() system_v {
ss0 = explicit_slot 4097
ebb0:
return
}
; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 4097, offset -4113
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112
; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64
; nextln: [Op1ret#c3] return v1
; nextln: }
; Like %big; still doesn't need a probe.
function %bigger() system_v {
ss0 = explicit_slot 8192
ebb0:
return
}
; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 8192, offset -8208
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192
; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64
; nextln: [Op1ret#c3] return v1
; nextln: }
; Like %bigger; this needs a probe.
function %biggest() system_v {
ss0 = explicit_slot 8193
ebb0:
return
}
; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 8193, offset -8209
; nextln: ss1 = incoming_arg 16, offset -16
; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack
; nextln: fn0 = colocated %Probestack sig0
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208
; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1)
; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208
; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64
; nextln: [Op1ret#c3] return v3
; nextln: }

View File

@@ -0,0 +1,50 @@
test compile
set is_64bit=1
set colocated_libcalls=1
isa x86
; A function with a big stack frame. This should have a stack probe.
function %big() system_v {
ss0 = explicit_slot 4097
ebb0:
return
}
; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 4097, offset -4113
; nextln: ss1 = incoming_arg 16, offset -16
; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack
; nextln: fn0 = colocated %Probestack sig0
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112
; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1)
; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112
; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64
; nextln: [Op1ret#c3] return v3
; nextln: }
; A function with a small enough stack frame. This shouldn't have a stack probe.
function %small() system_v {
ss0 = explicit_slot 4096
ebb0:
return
}
; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 4096, offset -4112
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: [RexOp1pushq#50] x86_push v0
; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp
; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096
; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096
; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64
; nextln: [Op1ret#c3] return v1
; nextln: }

View File

@@ -36,8 +36,8 @@ ebb0:
; nextln: ebb0(v0: i64 [%rbp]): ; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0 ; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp ; nextln: copy_special %rsp -> %rbp
; nextln: adjust_sp_imm -176 ; nextln: adjust_sp_down_imm 176
; nextln: adjust_sp_imm 176 ; nextln: adjust_sp_up_imm 176
; nextln: v1 = x86_pop.i64 ; nextln: v1 = x86_pop.i64
; nextln: return v1 ; nextln: return v1
; nextln: } ; nextln: }
@@ -109,7 +109,7 @@ ebb0(v0: i64, v1: i64):
; nextln: x86_push v18 ; nextln: x86_push v18
; nextln: x86_push v19 ; nextln: x86_push v19
; nextln: x86_push v20 ; nextln: x86_push v20
; nextln: adjust_sp_imm -8 ; nextln: adjust_sp_down_imm 8
; nextln: v2 = load.i32 v0 ; nextln: v2 = load.i32 v0
; nextln: v3 = load.i32 v0+8 ; nextln: v3 = load.i32 v0+8
; nextln: v4 = load.i32 v0+16 ; nextln: v4 = load.i32 v0+16
@@ -136,7 +136,7 @@ ebb0(v0: i64, v1: i64):
; nextln: store v12, v1+80 ; nextln: store v12, v1+80
; nextln: store v13, v1+88 ; nextln: store v13, v1+88
; nextln: store v14, v1+96 ; nextln: store v14, v1+96
; nextln: adjust_sp_imm 8 ; nextln: adjust_sp_up_imm 8
; nextln: v26 = x86_pop.i64 ; nextln: v26 = x86_pop.i64
; nextln: v25 = x86_pop.i64 ; nextln: v25 = x86_pop.i64
; nextln: v24 = x86_pop.i64 ; nextln: v24 = x86_pop.i64
@@ -192,13 +192,13 @@ ebb0(v0: i64, v1: i64):
; nextln: x86_push v51 ; nextln: x86_push v51
; nextln: x86_push v52 ; nextln: x86_push v52
; nextln: x86_push v53 ; nextln: x86_push v53
; nextln: adjust_sp_imm ; nextln: adjust_sp_down_imm
; check: spill ; check: spill
; check: fill ; check: fill
; check: adjust_sp_imm ; check: adjust_sp_up_imm
; nextln: v59 = x86_pop.i64 ; nextln: v59 = x86_pop.i64
; nextln: v58 = x86_pop.i64 ; nextln: v58 = x86_pop.i64
; nextln: v57 = x86_pop.i64 ; nextln: v57 = x86_pop.i64

View File

@@ -10,7 +10,7 @@ ebb0(v0: i32, v1: i32):
[Op1ret#c3] return v1 [Op1ret#c3] return v1
ebb1: ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3 [Op1pu_id#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8 [Op1ret#c3] return v8
} }
; sameln: function %br_icmp ; sameln: function %br_icmp
@@ -34,7 +34,7 @@ ebb0(v0: i32, v1: i32):
[Op1ret#c3] return v1 [Op1ret#c3] return v1
ebb1: ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3 [Op1pu_id#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8 [Op1ret#c3] return v8
} }
; sameln: function %br_icmp_inverse ; sameln: function %br_icmp_inverse
@@ -53,12 +53,12 @@ ebb1:
function %br_icmp_imm(i32, i32) -> i32 { function %br_icmp_imm(i32, i32) -> i32 {
ebb0(v0: i32, v1: i32): ebb0(v0: i32, v1: i32):
[Op1icsccib#7083] v2 = icmp_imm slt v0, 2 [Op1icscc_ib#7083] v2 = icmp_imm slt v0, 2
[Op1t8jccd_long#84] brz v2, ebb1 [Op1t8jccd_long#84] brz v2, ebb1
[Op1ret#c3] return v1 [Op1ret#c3] return v1
ebb1: ebb1:
[Op1puid#b8,%rax] v8 = iconst.i32 3 [Op1pu_id#b8,%rax] v8 = iconst.i32 3
[Op1ret#c3] return v8 [Op1ret#c3] return v8
} }
; sameln: function %br_icmp_imm ; sameln: function %br_icmp_imm
@@ -82,7 +82,7 @@ ebb0(v0: f32, v1: f32):
[Op1ret#c3] return v1 [Op1ret#c3] return v1
ebb1: ebb1:
[Op1puid#b8,%rax] v18 = iconst.i32 0x40a8_0000 [Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000
[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 [Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
[Op1ret#c3] return v8 [Op1ret#c3] return v8
} }

View File

@@ -7,8 +7,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000 heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000
ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64):
@0001 [RexOp1puid#b8] v5 = iconst.i32 0 @0001 [RexOp1pu_id#b8] v5 = iconst.i32 0
@0003 [RexOp1puid#b8] v6 = iconst.i32 0 @0003 [RexOp1pu_id#b8] v6 = iconst.i32 0
@0005 [RexOp1tjccb#74] brz v6, ebb10 @0005 [RexOp1tjccb#74] brz v6, ebb10
@0007 [RexOp1jmpb#eb] jump ebb3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) @0007 [RexOp1jmpb#eb] jump ebb3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3)
@@ -16,10 +16,10 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@000b [RexOp1jmpb#eb] jump ebb6 @000b [RexOp1jmpb#eb] jump ebb6
ebb6: ebb6:
@000d [RexOp1puid#b8] v8 = iconst.i32 0 @000d [RexOp1pu_id#b8] v8 = iconst.i32 0
@000f [RexOp1tjccb#75] brnz v8, ebb5 @000f [RexOp1tjccb#75] brnz v8, ebb5
@0011 [RexOp1puid#b8] v9 = iconst.i32 0 @0011 [RexOp1pu_id#b8] v9 = iconst.i32 0
@0015 [RexOp1puid#b8] v11 = iconst.i32 0 @0015 [RexOp1pu_id#b8] v11 = iconst.i32 0
@0017 [RexOp1icscc#39] v12 = icmp.i32 eq v15, v11 @0017 [RexOp1icscc#39] v12 = icmp.i32 eq v15, v11
@0017 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12 @0017 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12
@001a [RexOp1rr#21] v14 = band v9, v13 @001a [RexOp1rr#21] v14 = band v9, v13
@@ -28,11 +28,11 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
ebb7: ebb7:
@0020 [RexOp1tjccb#74] brz.i32 v17, ebb8 @0020 [RexOp1tjccb#74] brz.i32 v17, ebb8
@0022 [RexOp1puid#b8] v18 = iconst.i32 0 @0022 [RexOp1pu_id#b8] v18 = iconst.i32 0
@0024 [RexOp1tjccb#74] brz v18, ebb9 @0024 [RexOp1tjccb#74] brz v18, ebb9
@0028 [RexOp1puid#b8] v21 = iconst.i32 0 @0028 [RexOp1pu_id#b8] v21 = iconst.i32 0
@002a [RexOp1umr#89] v79 = uextend.i64 v5 @002a [RexOp1umr#89] v79 = uextend.i64 v5
@002a [RexOp1rib#8083] v80 = iadd_imm.i64 v4, 0 @002a [RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0
@002a [RexOp1ld#808b] v81 = load.i64 v80 @002a [RexOp1ld#808b] v81 = load.i64 v80
@002a [RexOp1rr#8001] v22 = iadd v81, v79 @002a [RexOp1rr#8001] v22 = iadd v81, v79
@002a [RexMp1st#189] istore16 v21, v22 @002a [RexMp1st#189] istore16 v21, v22
@@ -42,8 +42,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@002e [RexOp1jmpb#eb] jump ebb8 @002e [RexOp1jmpb#eb] jump ebb8
ebb8: ebb8:
@0033 [RexOp1puid#b8] v27 = iconst.i32 3 @0033 [RexOp1pu_id#b8] v27 = iconst.i32 3
@0035 [RexOp1puid#b8] v28 = iconst.i32 4 @0035 [RexOp1pu_id#b8] v28 = iconst.i32 4
@003b [RexOp1rr#09] v35 = bor.i32 v31, v13 @003b [RexOp1rr#09] v35 = bor.i32 v31, v13
@003c [RexOp1tjccb#75] brnz v35, ebb15(v27) @003c [RexOp1tjccb#75] brnz v35, ebb15(v27)
@003c [RexOp1jmpb#eb] jump ebb15(v28) @003c [RexOp1jmpb#eb] jump ebb15(v28)
@@ -58,25 +58,25 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
@0045 [RexOp1jmpb#eb] jump ebb2(v40, v47, v54, v61, v68, v75) @0045 [RexOp1jmpb#eb] jump ebb2(v40, v47, v54, v61, v68, v75)
ebb10: ebb10:
@0046 [RexOp1puid#b8] v43 = iconst.i32 0 @0046 [RexOp1pu_id#b8] v43 = iconst.i32 0
@0048 [RexOp1jmpb#eb] jump ebb2(v43, v5, v0, v1, v2, v3) @0048 [RexOp1jmpb#eb] jump ebb2(v43, v5, v0, v1, v2, v3)
ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32):
@004c [RexOp1puid#b8] v44 = iconst.i32 0 @004c [RexOp1pu_id#b8] v44 = iconst.i32 0
@004e [RexOp1tjccb#74] brz v44, ebb12 @004e [RexOp1tjccb#74] brz v44, ebb12
@0052 [RexOp1puid#b8] v50 = iconst.i32 11 @0052 [RexOp1pu_id#b8] v50 = iconst.i32 11
@0054 [RexOp1tjccb#74] brz v50, ebb14 @0054 [RexOp1tjccb#74] brz v50, ebb14
@0058 [RexOp1umr#89] v82 = uextend.i64 v52 @0058 [RexOp1umr#89] v82 = uextend.i64 v52
@0058 [RexOp1rib#8083] v83 = iadd_imm.i64 v4, 0 @0058 [RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0
@0058 [RexOp1ld#808b] v84 = load.i64 v83 @0058 [RexOp1ld#808b] v84 = load.i64 v83
@0058 [RexOp1rr#8001] v57 = iadd v84, v82 @0058 [RexOp1rr#8001] v57 = iadd v84, v82
@0058 [RexOp1ld#8b] v58 = load.i32 v57 @0058 [RexOp1ld#8b] v58 = load.i32 v57
@005d [RexOp1umr#89] v85 = uextend.i64 v58 @005d [RexOp1umr#89] v85 = uextend.i64 v58
@005d [RexOp1rib#8083] v86 = iadd_imm.i64 v4, 0 @005d [RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0
@005d [RexOp1ld#808b] v87 = load.i64 v86 @005d [RexOp1ld#808b] v87 = load.i64 v86
@005d [RexOp1rr#8001] v64 = iadd v87, v85 @005d [RexOp1rr#8001] v64 = iadd v87, v85
@005d [RexOp1st#88] istore8 v59, v64 @005d [RexOp1st#88] istore8 v59, v64
@0060 [RexOp1puid#b8] v65 = iconst.i32 0 @0060 [RexOp1pu_id#b8] v65 = iconst.i32 0
@0062 [RexOp1jmpb#eb] jump ebb13(v65) @0062 [RexOp1jmpb#eb] jump ebb13(v65)
ebb14: ebb14:
@@ -84,7 +84,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
ebb13(v51: i32): ebb13(v51: i32):
@0066 [RexOp1umr#89] v88 = uextend.i64 v45 @0066 [RexOp1umr#89] v88 = uextend.i64 v45
@0066 [RexOp1rib#8083] v89 = iadd_imm.i64 v4, 0 @0066 [RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0
@0066 [RexOp1ld#808b] v90 = load.i64 v89 @0066 [RexOp1ld#808b] v90 = load.i64 v89
@0066 [RexOp1rr#8001] v71 = iadd v90, v88 @0066 [RexOp1rr#8001] v71 = iadd v90, v88
@0066 [RexOp1st#89] store v51, v71 @0066 [RexOp1st#89] store v51, v71

View File

@@ -2,6 +2,7 @@
test compile test compile
set is_64bit set is_64bit
set probestack_enabled=0
isa x86 haswell isa x86 haswell
; This function contains unreachable blocks which trip up the register ; This function contains unreachable blocks which trip up the register

View File

@@ -591,12 +591,25 @@ stack_check = Instruction(
The global variable must be accessible and naturally aligned for a The global variable must be accessible and naturally aligned for a
pointer-sized value. pointer-sized value.
`stack_check` is an alternative way to detect stack overflow, when using
a calling convention that doesn't perform stack probes.
""", """,
ins=GV, can_trap=True) ins=GV, can_trap=True)
delta = Operand('delta', Int)
adjust_sp_down = Instruction(
'adjust_sp_down', r"""
Subtracts ``delta`` offset value from the stack pointer register.
This instruction is used to adjust the stack pointer by a dynamic amount.
""",
ins=(delta,),
other_side_effects=True)
StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer') StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
adjust_sp_imm = Instruction( adjust_sp_up_imm = Instruction(
'adjust_sp_imm', r""" 'adjust_sp_up_imm', r"""
Adds ``Offset`` immediate offset value to the stack pointer register. Adds ``Offset`` immediate offset value to the stack pointer register.
This instruction is used to adjust the stack pointer, primarily in function This instruction is used to adjust the stack pointer, primarily in function
@@ -606,6 +619,19 @@ adjust_sp_imm = Instruction(
ins=(StackOffset,), ins=(StackOffset,),
other_side_effects=True) other_side_effects=True)
StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
adjust_sp_down_imm = Instruction(
'adjust_sp_down_imm', r"""
Subtracts ``Offset`` immediate offset value from the stack pointer
register.
This instruction is used to adjust the stack pointer, primarily in function
prologues and epilogues. ``Offset`` is constrained to the size of a signed
32-bit integer.
""",
ins=(StackOffset,),
other_side_effects=True)
f = Operand('f', iflags) f = Operand('f', iflags)
ifcmp_sp = Instruction( ifcmp_sp = Instruction(

View File

@@ -38,17 +38,27 @@ call_conv = EnumSetting(
- system_v: System V-style convention used on many platforms - system_v: System V-style convention used on many platforms
- fastcall: Windows "fastcall" convention, also used for x64 and ARM - fastcall: Windows "fastcall" convention, also used for x64 and ARM
- baldrdash: SpiderMonkey WebAssembly convention - baldrdash: SpiderMonkey WebAssembly convention
- probestack: specialized convention for the probestack function
The default calling convention may be overridden by individual The default calling convention may be overridden by individual
functions. functions.
""", """,
'fast', 'cold', 'system_v', 'fastcall', 'baldrdash') 'fast', 'cold', 'system_v', 'fastcall', 'baldrdash', 'probestack')
# Note that Cretonne doesn't currently need an is_pie flag, because PIE is just # Note that Cretonne doesn't currently need an is_pie flag, because PIE is just
# PIC where symbols can't be pre-empted, which can be expressed with the # PIC where symbols can't be pre-empted, which can be expressed with the
# `colocated` flag on external functions and global variables. # `colocated` flag on external functions and global variables.
is_pic = BoolSetting("Enable Position-Independent Code generation") is_pic = BoolSetting("Enable Position-Independent Code generation")
colocated_libcalls = BoolSetting(
"""
Use colocated libcalls.
Generate code that assumes that libcalls can be declared "colocated",
meaning they will be defined along with the current function, such that
they can use more efficient addressing.
""")
return_at_end = BoolSetting( return_at_end = BoolSetting(
""" """
Generate functions with at most a single return instruction at the Generate functions with at most a single return instruction at the
@@ -115,4 +125,31 @@ allones_funcaddrs = BoolSetting(
Emit not-yet-relocated function addresses as all-ones bit patterns. Emit not-yet-relocated function addresses as all-ones bit patterns.
""") """)
#
# Stack probing options.
#
probestack_enabled = BoolSetting(
"""
Enable the use of stack probes, for calling conventions which support
this functionality.
""",
default=True)
probestack_func_adjusts_sp = BoolSetting(
"""
Set this to true of the stack probe function modifies the stack pointer
itself.
""")
probestack_size_log2 = NumSetting(
"""
The log2 of the size of the stack guard region.
Stack frames larger than this size will have stack overflow checked
by calling the probestack function.
The default is 12, which translates to a size of 4096.
""",
default=12)
group.close(globals()) group.close(globals())

View File

@@ -136,29 +136,29 @@ for inst, rrr in [
(base.band_imm, 4), (base.band_imm, 4),
(base.bor_imm, 1), (base.bor_imm, 1),
(base.bxor_imm, 6)]: (base.bxor_imm, 6)]:
enc_i32_i64(inst, r.rib, 0x83, rrr=rrr) enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
enc_i32_i64(inst, r.rid, 0x81, rrr=rrr) enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as # TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks. # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
# Immediate constants. # Immediate constants.
X86_32.enc(base.iconst.i32, *r.puid(0xb8)) X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8)) X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
X86_64.enc(base.iconst.i32, *r.puid(0xb8)) X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
# The 32-bit immediate movl also zero-extends to 64 bits. # The 32-bit immediate movl also zero-extends to 64 bits.
X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8), X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32)) instp=IsUnsignedInt(UnaryImm.imm, 32))
X86_64.enc(base.iconst.i64, *r.puid(0xb8), X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32)) instp=IsUnsignedInt(UnaryImm.imm, 32))
# Sign-extended 32-bit immediate. # Sign-extended 32-bit immediate.
X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1)) X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1)) X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
# bool constants. # bool constants.
enc_both(base.bconst.b1, r.puid_bool, 0xb8) enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
# Shifts and rotates. # Shifts and rotates.
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -180,7 +180,7 @@ for inst, rrr in [
(base.ishl_imm, 4), (base.ishl_imm, 4),
(base.ushr_imm, 5), (base.ushr_imm, 5),
(base.sshr_imm, 7)]: (base.sshr_imm, 7)]:
enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr) enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
# Population count. # Population count.
X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt) X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
@@ -254,11 +254,21 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58)
X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1)) X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
X86_32.enc(base.copy_special, *r.copysp(0x89)) X86_32.enc(base.copy_special, *r.copysp(0x89))
# Adjust SP Imm # Adjust SP down by a dynamic value (or up, with a negative operand).
X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83)) X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81)) X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1)) # Adjust SP up by an immediate (or down, with a negative immediate)
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
# Adjust SP down by an immediate (or up, with a negative immediate)
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
# #
# Float loads and stores. # Float loads and stores.
@@ -406,11 +416,11 @@ X86_64.enc(base.trapff, r.trapff, 0)
# Comparisons # Comparisons
# #
enc_i32_i64(base.icmp, r.icscc, 0x39) enc_i32_i64(base.icmp, r.icscc, 0x39)
enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7) enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7) enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
enc_i32_i64(base.ifcmp, r.rcmp, 0x39) enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7) enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7) enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
# TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39)) X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))

View File

@@ -480,8 +480,8 @@ mulx = TailRecipe(
''') ''')
# XX /n ib with 8-bit immediate sign-extended. # XX /n ib with 8-bit immediate sign-extended.
rib = TailRecipe( r_ib = TailRecipe(
'rib', BinaryImm, size=2, ins=GPR, outs=0, 'r_ib', BinaryImm, size=2, ins=GPR, outs=0,
instp=IsSignedInt(BinaryImm.imm, 8), instp=IsSignedInt(BinaryImm.imm, 8),
emit=''' emit='''
PUT_OP(bits, rex1(in_reg0), sink); PUT_OP(bits, rex1(in_reg0), sink);
@@ -491,8 +491,8 @@ rib = TailRecipe(
''') ''')
# XX /n id with 32-bit immediate sign-extended. # XX /n id with 32-bit immediate sign-extended.
rid = TailRecipe( r_id = TailRecipe(
'rid', BinaryImm, size=5, ins=GPR, outs=0, 'r_id', BinaryImm, size=5, ins=GPR, outs=0,
instp=IsSignedInt(BinaryImm.imm, 32), instp=IsSignedInt(BinaryImm.imm, 32),
emit=''' emit='''
PUT_OP(bits, rex1(in_reg0), sink); PUT_OP(bits, rex1(in_reg0), sink);
@@ -502,8 +502,8 @@ rid = TailRecipe(
''') ''')
# XX /n id with 32-bit immediate sign-extended. UnaryImm version. # XX /n id with 32-bit immediate sign-extended. UnaryImm version.
uid = TailRecipe( u_id = TailRecipe(
'uid', UnaryImm, size=5, ins=(), outs=GPR, 'u_id', UnaryImm, size=5, ins=(), outs=GPR,
instp=IsSignedInt(UnaryImm.imm, 32), instp=IsSignedInt(UnaryImm.imm, 32),
emit=''' emit='''
PUT_OP(bits, rex1(out_reg0), sink); PUT_OP(bits, rex1(out_reg0), sink);
@@ -513,8 +513,8 @@ uid = TailRecipe(
''') ''')
# XX+rd id unary with 32-bit immediate. Note no recipe predicate. # XX+rd id unary with 32-bit immediate. Note no recipe predicate.
puid = TailRecipe( pu_id = TailRecipe(
'puid', UnaryImm, size=4, ins=(), outs=GPR, 'pu_id', UnaryImm, size=4, ins=(), outs=GPR,
emit=''' emit='''
// The destination register is encoded in the low bits of the opcode. // The destination register is encoded in the low bits of the opcode.
// No ModR/M. // No ModR/M.
@@ -524,8 +524,8 @@ puid = TailRecipe(
''') ''')
# XX+rd id unary with bool immediate. Note no recipe predicate. # XX+rd id unary with bool immediate. Note no recipe predicate.
puid_bool = TailRecipe( pu_id_bool = TailRecipe(
'puid_bool', UnaryBool, size=4, ins=(), outs=GPR, 'pu_id_bool', UnaryBool, size=4, ins=(), outs=GPR,
emit=''' emit='''
// The destination register is encoded in the low bits of the opcode. // The destination register is encoded in the low bits of the opcode.
// No ModR/M. // No ModR/M.
@@ -535,8 +535,8 @@ puid_bool = TailRecipe(
''') ''')
# XX+rd iq unary with 64-bit immediate. # XX+rd iq unary with 64-bit immediate.
puiq = TailRecipe( pu_iq = TailRecipe(
'puiq', UnaryImm, size=8, ins=(), outs=GPR, 'pu_iq', UnaryImm, size=8, ins=(), outs=GPR,
emit=''' emit='''
PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
let imm: i64 = imm.into(); let imm: i64 = imm.into();
@@ -564,8 +564,15 @@ copysp = TailRecipe(
modrm_rr(dst, src, sink); modrm_rr(dst, src, sink);
''') ''')
adjustsp8 = TailRecipe( adjustsp = TailRecipe(
'adjustsp8', UnaryImm, size=2, ins=(), outs=(), 'adjustsp', Unary, size=1, ins=(GPR), outs=(),
emit='''
PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink);
modrm_rr(RU::rsp.into(), in_reg0, sink);
''')
adjustsp_ib = TailRecipe(
'adjustsp_ib', UnaryImm, size=2, ins=(), outs=(),
instp=IsSignedInt(UnaryImm.imm, 8), instp=IsSignedInt(UnaryImm.imm, 8),
emit=''' emit='''
PUT_OP(bits, rex1(RU::rsp.into()), sink); PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -574,8 +581,8 @@ adjustsp8 = TailRecipe(
sink.put1(imm as u8); sink.put1(imm as u8);
''') ''')
adjustsp32 = TailRecipe( adjustsp_id = TailRecipe(
'adjustsp32', UnaryImm, size=5, ins=(), outs=(), 'adjustsp_id', UnaryImm, size=5, ins=(), outs=(),
instp=IsSignedInt(UnaryImm.imm, 32), instp=IsSignedInt(UnaryImm.imm, 32),
emit=''' emit='''
PUT_OP(bits, rex1(RU::rsp.into()), sink); PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -1217,8 +1224,8 @@ fcmp = TailRecipe(
''') ''')
# XX /n, MI form with imm8. # XX /n, MI form with imm8.
rcmpib = TailRecipe( rcmp_ib = TailRecipe(
'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags, 'rcmp_ib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
instp=IsSignedInt(BinaryImm.imm, 8), instp=IsSignedInt(BinaryImm.imm, 8),
emit=''' emit='''
PUT_OP(bits, rex1(in_reg0), sink); PUT_OP(bits, rex1(in_reg0), sink);
@@ -1228,8 +1235,8 @@ rcmpib = TailRecipe(
''') ''')
# XX /n, MI form with imm32. # XX /n, MI form with imm32.
rcmpid = TailRecipe( rcmp_id = TailRecipe(
'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags, 'rcmp_id', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
instp=IsSignedInt(BinaryImm.imm, 32), instp=IsSignedInt(BinaryImm.imm, 32),
emit=''' emit='''
PUT_OP(bits, rex1(in_reg0), sink); PUT_OP(bits, rex1(in_reg0), sink);
@@ -1401,8 +1408,8 @@ icscc = TailRecipe(
modrm_rr(out_reg0, 0, sink); modrm_rr(out_reg0, 0, sink);
''') ''')
icsccib = TailRecipe( icscc_ib = TailRecipe(
'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD, 'icscc_ib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
instp=IsSignedInt(IntCompareImm.imm, 8), instp=IsSignedInt(IntCompareImm.imm, 8),
emit=''' emit='''
// Comparison instruction. // Comparison instruction.
@@ -1429,8 +1436,8 @@ icsccib = TailRecipe(
modrm_rr(out_reg0, 0, sink); modrm_rr(out_reg0, 0, sink);
''') ''')
icsccid = TailRecipe( icscc_id = TailRecipe(
'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD, 'icscc_id', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
instp=IsSignedInt(IntCompareImm.imm, 32), instp=IsSignedInt(IntCompareImm.imm, 32),
emit=''' emit='''
// Comparison instruction. // Comparison instruction.

View File

@@ -1,6 +1,9 @@
//! Naming well-known routines in the runtime library. //! Naming well-known routines in the runtime library.
use ir::{types, Opcode, Type}; use ir::{types, Opcode, Type, Inst, Function, FuncRef, ExternalName, Signature, AbiParam,
ExtFuncData, ArgumentPurpose};
use settings::CallConv;
use isa::{TargetIsa, RegUnit};
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
@@ -14,6 +17,9 @@ use std::str::FromStr;
/// This list is likely to grow over time. /// This list is likely to grow over time.
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum LibCall { pub enum LibCall {
/// probe for stack overflow. These are emitted for functions which need
/// when the `probestack_enabled` setting is true.
Probestack,
/// ceil.f32 /// ceil.f32
CeilF32, CeilF32,
/// ceil.f64 /// ceil.f64
@@ -32,7 +38,8 @@ pub enum LibCall {
NearestF64, NearestF64,
} }
const NAME: [&str; 8] = [ const NAME: [&str; 9] = [
"Probestack",
"CeilF32", "CeilF32",
"CeilF64", "CeilF64",
"FloorF32", "FloorF32",
@@ -54,6 +61,7 @@ impl FromStr for LibCall {
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
match s { match s {
"Probestack" => Ok(LibCall::Probestack),
"CeilF32" => Ok(LibCall::CeilF32), "CeilF32" => Ok(LibCall::CeilF32),
"CeilF64" => Ok(LibCall::CeilF64), "CeilF64" => Ok(LibCall::CeilF64),
"FloorF32" => Ok(LibCall::FloorF32), "FloorF32" => Ok(LibCall::FloorF32),
@@ -97,6 +105,96 @@ impl LibCall {
} }
} }
/// Get a function reference for `libcall` in `func`, following the signature
/// for `inst`.
///
/// If there is an existing reference, use it, otherwise make a new one.
pub fn get_libcall_funcref(
libcall: LibCall,
func: &mut Function,
inst: Inst,
isa: &TargetIsa,
) -> FuncRef {
find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
}
/// Get a function reference for the probestack function in `func`.
///
/// If there is an existing reference, use it, otherwise make a new one.
pub fn get_probestack_funcref(
func: &mut Function,
reg_type: Type,
arg_reg: RegUnit,
isa: &TargetIsa,
) -> FuncRef {
find_funcref(LibCall::Probestack, func).unwrap_or_else(|| {
make_funcref_for_probestack(func, reg_type, arg_reg, isa)
})
}
/// Get the existing function reference for `libcall` in `func` if it exists.
fn find_funcref(libcall: LibCall, func: &Function) -> Option<FuncRef> {
// We're assuming that all libcall function decls are at the end.
// If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
match func_data.name {
ExternalName::LibCall(lc) => {
if lc == libcall {
return Some(fref);
}
}
_ => break,
}
}
None
}
/// Create a funcref for `LibCall::Probestack`.
fn make_funcref_for_probestack(
func: &mut Function,
reg_type: Type,
arg_reg: RegUnit,
isa: &TargetIsa,
) -> FuncRef {
let mut sig = Signature::new(CallConv::Probestack);
let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg);
sig.params.push(rax);
if !isa.flags().probestack_func_adjusts_sp() {
sig.returns.push(rax);
}
make_funcref(LibCall::Probestack, func, sig, isa)
}
/// Create a funcref for `libcall` with a signature matching `inst`.
fn make_funcref_for_inst(
libcall: LibCall,
func: &mut Function,
inst: Inst,
isa: &TargetIsa,
) -> FuncRef {
// Start with a fast calling convention. We'll give the ISA a chance to change it.
let mut sig = Signature::new(isa.flags().call_conv());
for &v in func.dfg.inst_args(inst) {
sig.params.push(AbiParam::new(func.dfg.value_type(v)));
}
for &v in func.dfg.inst_results(inst) {
sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
}
make_funcref(libcall, func, sig, isa)
}
/// Create a funcref for `libcall`.
fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef {
let sigref = func.import_signature(sig);
func.import_function(ExtFuncData {
name: ExternalName::LibCall(libcall),
signature: sigref,
colocated: isa.flags().colocated_libcalls(),
})
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View File

@@ -33,7 +33,7 @@ pub use ir::heap::{HeapBase, HeapData, HeapStyle};
pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs}; pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
pub use ir::jumptable::JumpTableData; pub use ir::jumptable::JumpTableData;
pub use ir::layout::Layout; pub use ir::layout::Layout;
pub use ir::libcall::LibCall; pub use ir::libcall::{LibCall, get_libcall_funcref, get_probestack_funcref};
pub use ir::memflags::MemFlags; pub use ir::memflags::MemFlags;
pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
pub use ir::sourceloc::SourceLoc; pub use ir::sourceloc::SourceLoc;

View File

@@ -6,7 +6,8 @@ use cursor::{Cursor, CursorPosition, EncCursor};
use ir; use ir;
use ir::immediates::Imm64; use ir::immediates::Imm64;
use ir::stackslot::{StackOffset, StackSize}; use ir::stackslot::{StackOffset, StackSize};
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc}; use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc,
get_probestack_funcref};
use isa::{RegClass, RegUnit, TargetIsa}; use isa::{RegClass, RegUnit, TargetIsa};
use regalloc::RegisterSet; use regalloc::RegisterSet;
use result; use result;
@@ -216,10 +217,16 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::Ct
} }
CallConv::Fastcall => unimplemented!("Windows calling conventions"), CallConv::Fastcall => unimplemented!("Windows calling conventions"),
CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa), CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
CallConv::Probestack => unimplemented!("probestack calling convention"),
} }
} }
pub fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult { pub fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
debug_assert!(
!isa.flags().probestack_enabled(),
"baldrdash does not expect cretonne to emit stack probes"
);
// Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes. // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
let stack_align = 16; let stack_align = 16;
let word_size = if isa.flags().is_64bit() { 8 } else { 4 }; let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
@@ -239,7 +246,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
// newer versions use a 16-byte aligned stack pointer. // newer versions use a 16-byte aligned stack pointer.
let stack_align = 16; let stack_align = 16;
let word_size = if isa.flags().is_64bit() { 8 } else { 4 }; let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
let csr_type = if isa.flags().is_64bit() { let reg_type = if isa.flags().is_64bit() {
ir::types::I64 ir::types::I64
} else { } else {
ir::types::I32 ir::types::I32
@@ -266,7 +273,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
// Add CSRs to function signature // Add CSRs to function signature
let fp_arg = ir::AbiParam::special_reg( let fp_arg = ir::AbiParam::special_reg(
csr_type, reg_type,
ir::ArgumentPurpose::FramePointer, ir::ArgumentPurpose::FramePointer,
RU::rbp as RegUnit, RU::rbp as RegUnit,
); );
@@ -274,7 +281,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
func.signature.returns.push(fp_arg); func.signature.returns.push(fp_arg);
for csr in csrs.iter(GPR) { for csr in csrs.iter(GPR) {
let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr); let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
func.signature.params.push(csr_arg); func.signature.params.push(csr_arg);
func.signature.returns.push(csr_arg); func.signature.returns.push(csr_arg);
} }
@@ -282,11 +289,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
// Set up the cursor and insert the prologue // Set up the cursor and insert the prologue
let entry_ebb = func.layout.entry_block().expect("missing entry block"); let entry_ebb = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs); insert_system_v_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
// Reset the cursor and insert the epilogue // Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere); let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs); insert_system_v_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
Ok(()) Ok(())
} }
@@ -295,12 +302,13 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
fn insert_system_v_prologue( fn insert_system_v_prologue(
pos: &mut EncCursor, pos: &mut EncCursor,
stack_size: i64, stack_size: i64,
csr_type: ir::types::Type, reg_type: ir::types::Type,
csrs: &RegisterSet, csrs: &RegisterSet,
isa: &TargetIsa,
) { ) {
// Append param to entry EBB // Append param to entry EBB
let ebb = pos.current_ebb().expect("missing ebb under cursor"); let ebb = pos.current_ebb().expect("missing ebb under cursor");
let fp = pos.func.dfg.append_ebb_param(ebb, csr_type); let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.ins().x86_push(fp); pos.ins().x86_push(fp);
@@ -311,7 +319,7 @@ fn insert_system_v_prologue(
for reg in csrs.iter(GPR) { for reg in csrs.iter(GPR) {
// Append param to entry EBB // Append param to entry EBB
let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type); let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);
// Assign it a location // Assign it a location
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
@@ -320,8 +328,48 @@ fn insert_system_v_prologue(
pos.ins().x86_push(csr_arg); pos.ins().x86_push(csr_arg);
} }
// Allocate stack frame storage.
if stack_size > 0 { if stack_size > 0 {
pos.ins().adjust_sp_imm(Imm64::new(-stack_size)); if isa.flags().probestack_enabled() &&
stack_size > (1 << isa.flags().probestack_size_log2())
{
// Emit a stack probe.
let rax = RU::rax as RegUnit;
let rax_val = ir::ValueLoc::Reg(rax);
// The probestack function expects its input in %rax.
let arg = pos.ins().iconst(reg_type, stack_size);
pos.func.locations[arg] = rax_val;
// Call the probestack function.
let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
// Make the call.
let call = if !isa.flags().is_pic() && isa.flags().is_64bit() &&
!pos.func.dfg.ext_funcs[callee].colocated
{
// 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
// Use r11 as it may be clobbered under all supported calling conventions.
let r11 = RU::r11 as RegUnit;
let sig = pos.func.dfg.ext_funcs[callee].signature;
let addr = pos.ins().func_addr(reg_type, callee);
pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
pos.ins().call_indirect(sig, addr, &[arg])
} else {
// Otherwise just do a normal call.
pos.ins().call(callee, &[arg])
};
// If the probestack function doesn't adjust sp, do it ourselves.
if !isa.flags().probestack_func_adjusts_sp() {
let result = pos.func.dfg.inst_results(call)[0];
pos.func.locations[result] = rax_val;
pos.ins().adjust_sp_down(result);
}
} else {
// Simply decrement the stack pointer.
pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
}
} }
} }
@@ -329,14 +377,14 @@ fn insert_system_v_prologue(
fn insert_system_v_epilogues( fn insert_system_v_epilogues(
pos: &mut EncCursor, pos: &mut EncCursor,
stack_size: i64, stack_size: i64,
csr_type: ir::types::Type, reg_type: ir::types::Type,
csrs: &RegisterSet, csrs: &RegisterSet,
) { ) {
while let Some(ebb) = pos.next_ebb() { while let Some(ebb) = pos.next_ebb() {
pos.goto_last_inst(ebb); pos.goto_last_inst(ebb);
if let Some(inst) = pos.current_inst() { if let Some(inst) = pos.current_inst() {
if pos.func.dfg[inst].opcode().is_return() { if pos.func.dfg[inst].opcode().is_return() {
insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs); insert_system_v_epilogue(inst, stack_size, pos, reg_type, csrs);
} }
} }
} }
@@ -347,23 +395,23 @@ fn insert_system_v_epilogue(
inst: ir::Inst, inst: ir::Inst,
stack_size: i64, stack_size: i64,
pos: &mut EncCursor, pos: &mut EncCursor,
csr_type: ir::types::Type, reg_type: ir::types::Type,
csrs: &RegisterSet, csrs: &RegisterSet,
) { ) {
if stack_size > 0 { if stack_size > 0 {
pos.ins().adjust_sp_imm(Imm64::new(stack_size)); pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
} }
// Pop all the callee-saved registers, stepping backward each time to // Pop all the callee-saved registers, stepping backward each time to
// preserve the correct order. // preserve the correct order.
let fp_ret = pos.ins().x86_pop(csr_type); let fp_ret = pos.ins().x86_pop(reg_type);
pos.prev_inst(); pos.prev_inst();
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit); pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.func.dfg.append_inst_arg(inst, fp_ret); pos.func.dfg.append_inst_arg(inst, fp_ret);
for reg in csrs.iter(GPR) { for reg in csrs.iter(GPR) {
let csr_ret = pos.ins().x86_pop(csr_type); let csr_ret = pos.ins().x86_pop(reg_type);
pos.prev_inst(); pos.prev_inst();
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg); pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);

View File

@@ -1,7 +1,7 @@
//! Expanding instructions as runtime library calls. //! Expanding instructions as runtime library calls.
use ir; use ir;
use ir::InstBuilder; use ir::{InstBuilder, get_libcall_funcref};
use std::vec::Vec; use std::vec::Vec;
use isa::TargetIsa; use isa::TargetIsa;
@@ -14,58 +14,14 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIs
None => return false, None => return false,
}; };
let funcref =
find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func, isa));
// Now we convert `inst` to a call. First save the arguments. // Now we convert `inst` to a call. First save the arguments.
let mut args = Vec::new(); let mut args = Vec::new();
args.extend_from_slice(func.dfg.inst_args(inst)); args.extend_from_slice(func.dfg.inst_args(inst));
// The replace builder will preserve the instruction result values. // The replace builder will preserve the instruction result values.
let funcref = get_libcall_funcref(libcall, func, inst, isa);
func.dfg.replace(inst).call(funcref, &args); func.dfg.replace(inst).call(funcref, &args);
// TODO: ask the ISA to legalize the signature. // TODO: ask the ISA to legalize the signature.
true true
} }
/// Get the existing function reference for `libcall` in `func` if it exists.
fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
// We're assuming that all libcall function decls are at the end.
// If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
match func_data.name {
ir::ExternalName::LibCall(lc) => {
if lc == libcall {
return Some(fref);
}
}
_ => break,
}
}
None
}
/// Create a funcref for `libcall` with a signature matching `inst`.
fn make_funcref(
libcall: ir::LibCall,
inst: ir::Inst,
func: &mut ir::Function,
isa: &TargetIsa,
) -> ir::FuncRef {
// Start with a fast calling convention. We'll give the ISA a chance to change it.
let mut sig = ir::Signature::new(isa.flags().call_conv());
for &v in func.dfg.inst_args(inst) {
sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
}
for &v in func.dfg.inst_results(inst) {
sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v)));
}
let sigref = func.import_signature(sig);
// TODO: Can libcalls be colocated in some circumstances?
func.import_function(ir::ExtFuncData {
name: ir::ExternalName::LibCall(libcall),
signature: sigref,
colocated: false,
})
}

View File

@@ -363,6 +363,7 @@ mod tests {
is_64bit = false\n\ is_64bit = false\n\
call_conv = \"fast\"\n\ call_conv = \"fast\"\n\
is_pic = false\n\ is_pic = false\n\
colocated_libcalls = false\n\
return_at_end = false\n\ return_at_end = false\n\
avoid_div_traps = false\n\ avoid_div_traps = false\n\
is_compressed = false\n\ is_compressed = false\n\
@@ -370,7 +371,10 @@ mod tests {
enable_simd = true\n\ enable_simd = true\n\
enable_atomics = true\n\ enable_atomics = true\n\
baldrdash_prologue_words = 0\n\ baldrdash_prologue_words = 0\n\
allones_funcaddrs = false\n" allones_funcaddrs = false\n\
probestack_enabled = true\n\
probestack_func_adjusts_sp = false\n\
probestack_size_log2 = 12\n"
); );
assert_eq!(f.opt_level(), super::OptLevel::Default); assert_eq!(f.opt_level(), super::OptLevel::Default);
assert_eq!(f.enable_simd(), true); assert_eq!(f.enable_simd(), true);