diff --git a/cranelift/filetests/isa/x86/binary32.cton b/cranelift/filetests/isa/x86/binary32.cton index 6eb4565f79..9ce08fb66d 100644 --- a/cranelift/filetests/isa/x86/binary32.cton +++ b/cranelift/filetests/isa/x86/binary32.cton @@ -392,19 +392,37 @@ ebb0: ; asm: popl %ecx [-,%rcx] v512 = x86_pop.i32 ; bin: 59 - ; Adjust Stack Pointer + ; Adjust Stack Pointer Up ; asm: addl $64, %esp - adjust_sp_imm 64 ; bin: 83 c4 40 + adjust_sp_up_imm 64 ; bin: 83 c4 40 ; asm: addl $-64, %esp - adjust_sp_imm -64 ; bin: 83 c4 c0 + adjust_sp_up_imm -64 ; bin: 83 c4 c0 ; asm: addl $1024, %esp - adjust_sp_imm 1024 ; bin: 81 c4 00000400 + adjust_sp_up_imm 1024 ; bin: 81 c4 00000400 ; asm: addl $-1024, %esp - adjust_sp_imm -1024 ; bin: 81 c4 fffffc00 + adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00 ; asm: addl $2147483647, %esp - adjust_sp_imm 2147483647 ; bin: 81 c4 7fffffff + adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff ; asm: addl $-2147483648, %esp - adjust_sp_imm -2147483648 ; bin: 81 c4 80000000 + adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000 + + ; Adjust Stack Pointer Down + ; asm: subl %ecx, %esp + adjust_sp_down v1 ; bin: 29 cc + ; asm: subl %esi, %esp + adjust_sp_down v2 ; bin: 29 f4 + ; asm: addl $64, %esp + adjust_sp_down_imm 64 ; bin: 83 ec 40 + ; asm: addl $-64, %esp + adjust_sp_down_imm -64 ; bin: 83 ec c0 + ; asm: addl $1024, %esp + adjust_sp_down_imm 1024 ; bin: 81 ec 00000400 + ; asm: addl $-1024, %esp + adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00 + ; asm: addl $2147483647, %esp + adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff + ; asm: addl $-2147483648, %esp + adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000 ; Shift immediates ; asm: shll $2, %esi diff --git a/cranelift/filetests/isa/x86/binary64.cton b/cranelift/filetests/isa/x86/binary64.cton index 6417cd26e2..a9b96a86dc 100644 --- a/cranelift/filetests/isa/x86/binary64.cton +++ b/cranelift/filetests/isa/x86/binary64.cton @@ -547,19 +547,37 @@ ebb0: ; asm: popq %r10 [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a - ; Adjust Stack Pointer + ; Adjust Stack Pointer Up ; asm: addq $64, %rsp - adjust_sp_imm 64 ; bin: 48 83 c4 40 + adjust_sp_up_imm 64 ; bin: 48 83 c4 40 ; asm: addq $-64, %rsp - adjust_sp_imm -64 ; bin: 48 83 c4 c0 + adjust_sp_up_imm -64 ; bin: 48 83 c4 c0 ; asm: addq $1024, %rsp - adjust_sp_imm 1024 ; bin: 48 81 c4 00000400 + adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400 ; asm: addq $-1024, %rsp - adjust_sp_imm -1024 ; bin: 48 81 c4 fffffc00 + adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00 ; asm: addq $2147483647, %rsp - adjust_sp_imm 2147483647 ; bin: 48 81 c4 7fffffff + adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff ; asm: addq $-2147483648, %rsp - adjust_sp_imm -2147483648 ; bin: 48 81 c4 80000000 + adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000 + + ; Adjust Stack Pointer Down + ; asm: subq %rcx, %rsp + adjust_sp_down v1 ; bin: 48 29 cc + ; asm: subq %r10, %rsp + adjust_sp_down v3 ; bin: 4c 29 d4 + ; asm: subq $64, %rsp + adjust_sp_down_imm 64 ; bin: 48 83 ec 40 + ; asm: subq $-64, %rsp + adjust_sp_down_imm -64 ; bin: 48 83 ec c0 + ; asm: subq $1024, %rsp + adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400 + ; asm: subq $-1024, %rsp + adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00 + ; asm: subq $2147483647, %rsp + adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff + ; asm: subq $-2147483648, %rsp + adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000 ; Shift immediates ; asm: shlq $12, %rsi diff --git a/cranelift/filetests/isa/x86/probestack-adjusts-sp.cton b/cranelift/filetests/isa/x86/probestack-adjusts-sp.cton new file mode 100644 index 0000000000..2204886046 --- /dev/null +++ b/cranelift/filetests/isa/x86/probestack-adjusts-sp.cton @@ -0,0 +1,29 @@ +test compile +set is_64bit=1 +set colocated_libcalls=1 +set probestack_func_adjusts_sp=1 +isa x86 + +; Like %big in probestack.cton, but with the probestack function adjusting +; the stack pointer itself. + +function %big() system_v { + ss0 = explicit_slot 300000 +ebb0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 +; nextln: [Op1call_id#e8] call fn0(v1) +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64 +; nextln: [Op1ret#c3] return v2 +; nextln: } diff --git a/cranelift/filetests/isa/x86/probestack-disabled.cton b/cranelift/filetests/isa/x86/probestack-disabled.cton new file mode 100644 index 0000000000..282e088a3a --- /dev/null +++ b/cranelift/filetests/isa/x86/probestack-disabled.cton @@ -0,0 +1,25 @@ +test compile +set is_64bit=1 +set colocated_libcalls=1 +set probestack_enabled=0 +isa x86 + +; Like %big in probestack.cton, but with probes disabled. + +function %big() system_v { + ss0 = explicit_slot 300000 +ebb0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } diff --git a/cranelift/filetests/isa/x86/probestack-noncolocated.cton b/cranelift/filetests/isa/x86/probestack-noncolocated.cton new file mode 100644 index 0000000000..3248f4a142 --- /dev/null +++ b/cranelift/filetests/isa/x86/probestack-noncolocated.cton @@ -0,0 +1,28 @@ +test compile +set is_64bit=1 +isa x86 + +; Like %big in probestack.cton, but without a colocated libcall. + +function %big() system_v { + ss0 = explicit_slot 300000 +ebb0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = %Probestack sig0 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 +; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0 +; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64 +; nextln: [Op1ret#c3] return v4 +; nextln: } diff --git a/cranelift/filetests/isa/x86/probestack-size.cton b/cranelift/filetests/isa/x86/probestack-size.cton new file mode 100644 index 0000000000..d7c92a5aa5 --- /dev/null +++ b/cranelift/filetests/isa/x86/probestack-size.cton @@ -0,0 +1,75 @@ +test compile +set is_64bit=1 +set colocated_libcalls=1 +set probestack_size_log2=13 +isa x86 + +; Like %big in probestack.cton, but now the probestack size is bigger +; and it no longer needs a probe. + +function %big() system_v { + ss0 = explicit_slot 4097 +ebb0: + return +} + +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4097, offset -4113 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } + + +; Like %big; still doesn't need a probe. + +function %bigger() system_v { + ss0 = explicit_slot 8192 +ebb0: + return +} + +; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 8192, offset -8208 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } + + +; Like %bigger; this needs a probe. + +function %biggest() system_v { + ss0 = explicit_slot 8193 +ebb0: + return +} + +; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 8193, offset -8209 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208 +; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208 +; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 +; nextln: [Op1ret#c3] return v3 +; nextln: } diff --git a/cranelift/filetests/isa/x86/probestack.cton b/cranelift/filetests/isa/x86/probestack.cton new file mode 100644 index 0000000000..8b961da3bd --- /dev/null +++ b/cranelift/filetests/isa/x86/probestack.cton @@ -0,0 +1,50 @@ +test compile +set is_64bit=1 +set colocated_libcalls=1 +isa x86 + +; A function with a big stack frame. This should have a stack probe. + +function %big() system_v { + ss0 = explicit_slot 4097 +ebb0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4097, offset -4113 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112 +; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 +; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 +; nextln: [Op1ret#c3] return v3 +; nextln: } + + +; A function with a small enough stack frame. This shouldn't have a stack probe. + +function %small() system_v { + ss0 = explicit_slot 4096 +ebb0: + return +} + +; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4096, offset -4112 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: ebb0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } diff --git a/cranelift/filetests/isa/x86/prologue-epilogue.cton b/cranelift/filetests/isa/x86/prologue-epilogue.cton index bf4bf298b5..41d2147438 100644 --- a/cranelift/filetests/isa/x86/prologue-epilogue.cton +++ b/cranelift/filetests/isa/x86/prologue-epilogue.cton @@ -36,8 +36,8 @@ ebb0: ; nextln: ebb0(v0: i64 [%rbp]): ; nextln: x86_push v0 ; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_imm -176 -; nextln: adjust_sp_imm 176 +; nextln: adjust_sp_down_imm 176 +; nextln: adjust_sp_up_imm 176 ; nextln: v1 = x86_pop.i64 ; nextln: return v1 ; nextln: } @@ -109,7 +109,7 @@ ebb0(v0: i64, v1: i64): ; nextln: x86_push v18 ; nextln: x86_push v19 ; nextln: x86_push v20 -; nextln: adjust_sp_imm -8 +; nextln: adjust_sp_down_imm 8 ; nextln: v2 = load.i32 v0 ; nextln: v3 = load.i32 v0+8 ; nextln: v4 = load.i32 v0+16 @@ -136,7 +136,7 @@ ebb0(v0: i64, v1: i64): ; nextln: store v12, v1+80 ; nextln: store v13, v1+88 ; nextln: store v14, v1+96 -; nextln: adjust_sp_imm 8 +; nextln: adjust_sp_up_imm 8 ; nextln: v26 = x86_pop.i64 ; nextln: v25 = x86_pop.i64 ; nextln: v24 = x86_pop.i64 @@ -192,13 +192,13 @@ ebb0(v0: i64, v1: i64): ; nextln: x86_push v51 ; nextln: x86_push v52 ; nextln: x86_push v53 -; nextln: adjust_sp_imm +; nextln: adjust_sp_down_imm ; check: spill ; check: fill -; check: adjust_sp_imm +; check: adjust_sp_up_imm ; nextln: v59 = x86_pop.i64 ; nextln: v58 = x86_pop.i64 ; nextln: v57 = x86_pop.i64 diff --git a/cranelift/filetests/postopt/basic.cton b/cranelift/filetests/postopt/basic.cton index 48b6b66007..678cecc27c 100644 --- a/cranelift/filetests/postopt/basic.cton +++ b/cranelift/filetests/postopt/basic.cton @@ -10,7 +10,7 @@ ebb0(v0: i32, v1: i32): [Op1ret#c3] return v1 ebb1: -[Op1puid#b8,%rax] v8 = iconst.i32 3 +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 [Op1ret#c3] return v8 } ; sameln: function %br_icmp @@ -34,7 +34,7 @@ ebb0(v0: i32, v1: i32): [Op1ret#c3] return v1 ebb1: -[Op1puid#b8,%rax] v8 = iconst.i32 3 +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 [Op1ret#c3] return v8 } ; sameln: function %br_icmp_inverse @@ -53,12 +53,12 @@ ebb1: function %br_icmp_imm(i32, i32) -> i32 { ebb0(v0: i32, v1: i32): -[Op1icsccib#7083] v2 = icmp_imm slt v0, 2 +[Op1icscc_ib#7083] v2 = icmp_imm slt v0, 2 [Op1t8jccd_long#84] brz v2, ebb1 [Op1ret#c3] return v1 ebb1: -[Op1puid#b8,%rax] v8 = iconst.i32 3 +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 [Op1ret#c3] return v8 } ; sameln: function %br_icmp_imm @@ -82,7 +82,7 @@ ebb0(v0: f32, v1: f32): [Op1ret#c3] return v1 ebb1: -[Op1puid#b8,%rax] v18 = iconst.i32 0x40a8_0000 +[Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000 [Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 [Op1ret#c3] return v8 } diff --git a/cranelift/filetests/regalloc/coloring-227.cton b/cranelift/filetests/regalloc/coloring-227.cton index 8144bba62e..2b327633c7 100644 --- a/cranelift/filetests/regalloc/coloring-227.cton +++ b/cranelift/filetests/regalloc/coloring-227.cton @@ -7,8 +7,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) heap0 = static gv0, min 0, bound 0x0001_0000_0000, guard 0x8000_0000 ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): -@0001 [RexOp1puid#b8] v5 = iconst.i32 0 -@0003 [RexOp1puid#b8] v6 = iconst.i32 0 +@0001 [RexOp1pu_id#b8] v5 = iconst.i32 0 +@0003 [RexOp1pu_id#b8] v6 = iconst.i32 0 @0005 [RexOp1tjccb#74] brz v6, ebb10 @0007 [RexOp1jmpb#eb] jump ebb3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) @@ -16,10 +16,10 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) @000b [RexOp1jmpb#eb] jump ebb6 ebb6: -@000d [RexOp1puid#b8] v8 = iconst.i32 0 +@000d [RexOp1pu_id#b8] v8 = iconst.i32 0 @000f [RexOp1tjccb#75] brnz v8, ebb5 -@0011 [RexOp1puid#b8] v9 = iconst.i32 0 -@0015 [RexOp1puid#b8] v11 = iconst.i32 0 +@0011 [RexOp1pu_id#b8] v9 = iconst.i32 0 +@0015 [RexOp1pu_id#b8] v11 = iconst.i32 0 @0017 [RexOp1icscc#39] v12 = icmp.i32 eq v15, v11 @0017 [RexOp2urm_noflags#4b6] v13 = bint.i32 v12 @001a [RexOp1rr#21] v14 = band v9, v13 @@ -28,11 +28,11 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb7: @0020 [RexOp1tjccb#74] brz.i32 v17, ebb8 -@0022 [RexOp1puid#b8] v18 = iconst.i32 0 +@0022 [RexOp1pu_id#b8] v18 = iconst.i32 0 @0024 [RexOp1tjccb#74] brz v18, ebb9 -@0028 [RexOp1puid#b8] v21 = iconst.i32 0 +@0028 [RexOp1pu_id#b8] v21 = iconst.i32 0 @002a [RexOp1umr#89] v79 = uextend.i64 v5 -@002a [RexOp1rib#8083] v80 = iadd_imm.i64 v4, 0 +@002a [RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 @002a [RexOp1ld#808b] v81 = load.i64 v80 @002a [RexOp1rr#8001] v22 = iadd v81, v79 @002a [RexMp1st#189] istore16 v21, v22 @@ -42,8 +42,8 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) @002e [RexOp1jmpb#eb] jump ebb8 ebb8: -@0033 [RexOp1puid#b8] v27 = iconst.i32 3 -@0035 [RexOp1puid#b8] v28 = iconst.i32 4 +@0033 [RexOp1pu_id#b8] v27 = iconst.i32 3 +@0035 [RexOp1pu_id#b8] v28 = iconst.i32 4 @003b [RexOp1rr#09] v35 = bor.i32 v31, v13 @003c [RexOp1tjccb#75] brnz v35, ebb15(v27) @003c [RexOp1jmpb#eb] jump ebb15(v28) @@ -58,25 +58,25 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) @0045 [RexOp1jmpb#eb] jump ebb2(v40, v47, v54, v61, v68, v75) ebb10: -@0046 [RexOp1puid#b8] v43 = iconst.i32 0 +@0046 [RexOp1pu_id#b8] v43 = iconst.i32 0 @0048 [RexOp1jmpb#eb] jump ebb2(v43, v5, v0, v1, v2, v3) ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): -@004c [RexOp1puid#b8] v44 = iconst.i32 0 +@004c [RexOp1pu_id#b8] v44 = iconst.i32 0 @004e [RexOp1tjccb#74] brz v44, ebb12 -@0052 [RexOp1puid#b8] v50 = iconst.i32 11 +@0052 [RexOp1pu_id#b8] v50 = iconst.i32 11 @0054 [RexOp1tjccb#74] brz v50, ebb14 @0058 [RexOp1umr#89] v82 = uextend.i64 v52 -@0058 [RexOp1rib#8083] v83 = iadd_imm.i64 v4, 0 +@0058 [RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 @0058 [RexOp1ld#808b] v84 = load.i64 v83 @0058 [RexOp1rr#8001] v57 = iadd v84, v82 @0058 [RexOp1ld#8b] v58 = load.i32 v57 @005d [RexOp1umr#89] v85 = uextend.i64 v58 -@005d [RexOp1rib#8083] v86 = iadd_imm.i64 v4, 0 +@005d [RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 @005d [RexOp1ld#808b] v87 = load.i64 v86 @005d [RexOp1rr#8001] v64 = iadd v87, v85 @005d [RexOp1st#88] istore8 v59, v64 -@0060 [RexOp1puid#b8] v65 = iconst.i32 0 +@0060 [RexOp1pu_id#b8] v65 = iconst.i32 0 @0062 [RexOp1jmpb#eb] jump ebb13(v65) ebb14: @@ -84,7 +84,7 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) ebb13(v51: i32): @0066 [RexOp1umr#89] v88 = uextend.i64 v45 -@0066 [RexOp1rib#8083] v89 = iadd_imm.i64 v4, 0 +@0066 [RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 @0066 [RexOp1ld#808b] v90 = load.i64 v89 @0066 [RexOp1rr#8001] v71 = iadd v90, v88 @0066 [RexOp1st#89] store v51, v71 diff --git a/cranelift/filetests/regalloc/unreachable_code.cton b/cranelift/filetests/regalloc/unreachable_code.cton index 34d8c4c164..8bfffc22a2 100644 --- a/cranelift/filetests/regalloc/unreachable_code.cton +++ b/cranelift/filetests/regalloc/unreachable_code.cton @@ -2,6 +2,7 @@ test compile set is_64bit +set probestack_enabled=0 isa x86 haswell ; This function contains unreachable blocks which trip up the register diff --git a/lib/codegen/meta/base/instructions.py b/lib/codegen/meta/base/instructions.py index c54f3ef570..8fee97ae42 100644 --- a/lib/codegen/meta/base/instructions.py +++ b/lib/codegen/meta/base/instructions.py @@ -591,12 +591,25 @@ stack_check = Instruction( The global variable must be accessible and naturally aligned for a pointer-sized value. + + `stack_check` is an alternative way to detect stack overflow, when using + a calling convention that doesn't perform stack probes. """, ins=GV, can_trap=True) +delta = Operand('delta', Int) +adjust_sp_down = Instruction( + 'adjust_sp_down', r""" + Subtracts ``delta`` offset value from the stack pointer register. + + This instruction is used to adjust the stack pointer by a dynamic amount. + """, + ins=(delta,), + other_side_effects=True) + StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer') -adjust_sp_imm = Instruction( - 'adjust_sp_imm', r""" +adjust_sp_up_imm = Instruction( + 'adjust_sp_up_imm', r""" Adds ``Offset`` immediate offset value to the stack pointer register. This instruction is used to adjust the stack pointer, primarily in function @@ -606,6 +619,19 @@ adjust_sp_imm = Instruction( ins=(StackOffset,), other_side_effects=True) +StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer') +adjust_sp_down_imm = Instruction( + 'adjust_sp_down_imm', r""" + Subtracts ``Offset`` immediate offset value from the stack pointer + register. + + This instruction is used to adjust the stack pointer, primarily in function + prologues and epilogues. ``Offset`` is constrained to the size of a signed + 32-bit integer. + """, + ins=(StackOffset,), + other_side_effects=True) + f = Operand('f', iflags) ifcmp_sp = Instruction( diff --git a/lib/codegen/meta/base/settings.py b/lib/codegen/meta/base/settings.py index 580f0ee33e..32df3d63b4 100644 --- a/lib/codegen/meta/base/settings.py +++ b/lib/codegen/meta/base/settings.py @@ -38,17 +38,27 @@ call_conv = EnumSetting( - system_v: System V-style convention used on many platforms - fastcall: Windows "fastcall" convention, also used for x64 and ARM - baldrdash: SpiderMonkey WebAssembly convention + - probestack: specialized convention for the probestack function The default calling convention may be overridden by individual functions. """, - 'fast', 'cold', 'system_v', 'fastcall', 'baldrdash') + 'fast', 'cold', 'system_v', 'fastcall', 'baldrdash', 'probestack') # Note that Cretonne doesn't currently need an is_pie flag, because PIE is just # PIC where symbols can't be pre-empted, which can be expressed with the # `colocated` flag on external functions and global variables. is_pic = BoolSetting("Enable Position-Independent Code generation") +colocated_libcalls = BoolSetting( + """ + Use colocated libcalls. + + Generate code that assumes that libcalls can be declared "colocated", + meaning they will be defined along with the current function, such that + they can use more efficient addressing. + """) + return_at_end = BoolSetting( """ Generate functions with at most a single return instruction at the @@ -115,4 +125,31 @@ allones_funcaddrs = BoolSetting( Emit not-yet-relocated function addresses as all-ones bit patterns. """) +# +# Stack probing options. +# +probestack_enabled = BoolSetting( + """ + Enable the use of stack probes, for calling conventions which support + this functionality. + """, + default=True) + +probestack_func_adjusts_sp = BoolSetting( + """ + Set this to true of the stack probe function modifies the stack pointer + itself. + """) + +probestack_size_log2 = NumSetting( + """ + The log2 of the size of the stack guard region. + + Stack frames larger than this size will have stack overflow checked + by calling the probestack function. + + The default is 12, which translates to a size of 4096. + """, + default=12) + group.close(globals()) diff --git a/lib/codegen/meta/isa/x86/encodings.py b/lib/codegen/meta/isa/x86/encodings.py index 8b3244222b..65c28b1191 100644 --- a/lib/codegen/meta/isa/x86/encodings.py +++ b/lib/codegen/meta/isa/x86/encodings.py @@ -136,29 +136,29 @@ for inst, rrr in [ (base.band_imm, 4), (base.bor_imm, 1), (base.bxor_imm, 6)]: - enc_i32_i64(inst, r.rib, 0x83, rrr=rrr) - enc_i32_i64(inst, r.rid, 0x81, rrr=rrr) + enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr) + enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr) # TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks. # Immediate constants. -X86_32.enc(base.iconst.i32, *r.puid(0xb8)) +X86_32.enc(base.iconst.i32, *r.pu_id(0xb8)) -X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8)) -X86_64.enc(base.iconst.i32, *r.puid(0xb8)) +X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8)) +X86_64.enc(base.iconst.i32, *r.pu_id(0xb8)) # The 32-bit immediate movl also zero-extends to 64 bits. -X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8), +X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8), instp=IsUnsignedInt(UnaryImm.imm, 32)) -X86_64.enc(base.iconst.i64, *r.puid(0xb8), +X86_64.enc(base.iconst.i64, *r.pu_id(0xb8), instp=IsUnsignedInt(UnaryImm.imm, 32)) # Sign-extended 32-bit immediate. -X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1)) +X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1)) # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. -X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1)) +X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1)) # bool constants. -enc_both(base.bconst.b1, r.puid_bool, 0xb8) +enc_both(base.bconst.b1, r.pu_id_bool, 0xb8) # Shifts and rotates. # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit @@ -180,7 +180,7 @@ for inst, rrr in [ (base.ishl_imm, 4), (base.ushr_imm, 5), (base.sshr_imm, 7)]: - enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr) + enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr) # Population count. X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt) @@ -254,11 +254,21 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58) X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1)) X86_32.enc(base.copy_special, *r.copysp(0x89)) -# Adjust SP Imm -X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83)) -X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81)) -X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1)) -X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1)) +# Adjust SP down by a dynamic value (or up, with a negative operand). +X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29)) +X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1)) + +# Adjust SP up by an immediate (or down, with a negative immediate) +X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83)) +X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81)) +X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1)) +X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1)) + +# Adjust SP down by an immediate (or up, with a negative immediate) +X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5)) +X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5)) +X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1)) +X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1)) # # Float loads and stores. @@ -406,11 +416,11 @@ X86_64.enc(base.trapff, r.trapff, 0) # Comparisons # enc_i32_i64(base.icmp, r.icscc, 0x39) -enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7) -enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7) +enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7) +enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7) enc_i32_i64(base.ifcmp, r.rcmp, 0x39) -enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7) -enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7) +enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7) +enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7) # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39)) diff --git a/lib/codegen/meta/isa/x86/recipes.py b/lib/codegen/meta/isa/x86/recipes.py index 0f9a79d6a7..bfe57d8bcb 100644 --- a/lib/codegen/meta/isa/x86/recipes.py +++ b/lib/codegen/meta/isa/x86/recipes.py @@ -480,8 +480,8 @@ mulx = TailRecipe( ''') # XX /n ib with 8-bit immediate sign-extended. -rib = TailRecipe( - 'rib', BinaryImm, size=2, ins=GPR, outs=0, +r_ib = TailRecipe( + 'r_ib', BinaryImm, size=2, ins=GPR, outs=0, instp=IsSignedInt(BinaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -491,8 +491,8 @@ rib = TailRecipe( ''') # XX /n id with 32-bit immediate sign-extended. -rid = TailRecipe( - 'rid', BinaryImm, size=5, ins=GPR, outs=0, +r_id = TailRecipe( + 'r_id', BinaryImm, size=5, ins=GPR, outs=0, instp=IsSignedInt(BinaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -502,8 +502,8 @@ rid = TailRecipe( ''') # XX /n id with 32-bit immediate sign-extended. UnaryImm version. -uid = TailRecipe( - 'uid', UnaryImm, size=5, ins=(), outs=GPR, +u_id = TailRecipe( + 'u_id', UnaryImm, size=5, ins=(), outs=GPR, instp=IsSignedInt(UnaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(out_reg0), sink); @@ -513,8 +513,8 @@ uid = TailRecipe( ''') # XX+rd id unary with 32-bit immediate. Note no recipe predicate. -puid = TailRecipe( - 'puid', UnaryImm, size=4, ins=(), outs=GPR, +pu_id = TailRecipe( + 'pu_id', UnaryImm, size=4, ins=(), outs=GPR, emit=''' // The destination register is encoded in the low bits of the opcode. // No ModR/M. @@ -524,8 +524,8 @@ puid = TailRecipe( ''') # XX+rd id unary with bool immediate. Note no recipe predicate. -puid_bool = TailRecipe( - 'puid_bool', UnaryBool, size=4, ins=(), outs=GPR, +pu_id_bool = TailRecipe( + 'pu_id_bool', UnaryBool, size=4, ins=(), outs=GPR, emit=''' // The destination register is encoded in the low bits of the opcode. // No ModR/M. @@ -535,8 +535,8 @@ puid_bool = TailRecipe( ''') # XX+rd iq unary with 64-bit immediate. -puiq = TailRecipe( - 'puiq', UnaryImm, size=8, ins=(), outs=GPR, +pu_iq = TailRecipe( + 'pu_iq', UnaryImm, size=8, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); let imm: i64 = imm.into(); @@ -564,8 +564,15 @@ copysp = TailRecipe( modrm_rr(dst, src, sink); ''') -adjustsp8 = TailRecipe( - 'adjustsp8', UnaryImm, size=2, ins=(), outs=(), +adjustsp = TailRecipe( + 'adjustsp', Unary, size=1, ins=(GPR), outs=(), + emit=''' + PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink); + modrm_rr(RU::rsp.into(), in_reg0, sink); + ''') + +adjustsp_ib = TailRecipe( + 'adjustsp_ib', UnaryImm, size=2, ins=(), outs=(), instp=IsSignedInt(UnaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(RU::rsp.into()), sink); @@ -574,8 +581,8 @@ adjustsp8 = TailRecipe( sink.put1(imm as u8); ''') -adjustsp32 = TailRecipe( - 'adjustsp32', UnaryImm, size=5, ins=(), outs=(), +adjustsp_id = TailRecipe( + 'adjustsp_id', UnaryImm, size=5, ins=(), outs=(), instp=IsSignedInt(UnaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(RU::rsp.into()), sink); @@ -1217,8 +1224,8 @@ fcmp = TailRecipe( ''') # XX /n, MI form with imm8. -rcmpib = TailRecipe( - 'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags, +rcmp_ib = TailRecipe( + 'rcmp_ib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags, instp=IsSignedInt(BinaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -1228,8 +1235,8 @@ rcmpib = TailRecipe( ''') # XX /n, MI form with imm32. -rcmpid = TailRecipe( - 'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags, +rcmp_id = TailRecipe( + 'rcmp_id', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags, instp=IsSignedInt(BinaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -1401,8 +1408,8 @@ icscc = TailRecipe( modrm_rr(out_reg0, 0, sink); ''') -icsccib = TailRecipe( - 'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD, +icscc_ib = TailRecipe( + 'icscc_ib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD, instp=IsSignedInt(IntCompareImm.imm, 8), emit=''' // Comparison instruction. @@ -1429,8 +1436,8 @@ icsccib = TailRecipe( modrm_rr(out_reg0, 0, sink); ''') -icsccid = TailRecipe( - 'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD, +icscc_id = TailRecipe( + 'icscc_id', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD, instp=IsSignedInt(IntCompareImm.imm, 32), emit=''' // Comparison instruction. diff --git a/lib/codegen/src/ir/libcall.rs b/lib/codegen/src/ir/libcall.rs index 12216522ae..8b8e2a0599 100644 --- a/lib/codegen/src/ir/libcall.rs +++ b/lib/codegen/src/ir/libcall.rs @@ -1,6 +1,9 @@ //! Naming well-known routines in the runtime library. -use ir::{types, Opcode, Type}; +use ir::{types, Opcode, Type, Inst, Function, FuncRef, ExternalName, Signature, AbiParam, + ExtFuncData, ArgumentPurpose}; +use settings::CallConv; +use isa::{TargetIsa, RegUnit}; use std::fmt; use std::str::FromStr; @@ -14,6 +17,9 @@ use std::str::FromStr; /// This list is likely to grow over time. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum LibCall { + /// probe for stack overflow. These are emitted for functions which need + /// when the `probestack_enabled` setting is true. + Probestack, /// ceil.f32 CeilF32, /// ceil.f64 @@ -32,7 +38,8 @@ pub enum LibCall { NearestF64, } -const NAME: [&str; 8] = [ +const NAME: [&str; 9] = [ + "Probestack", "CeilF32", "CeilF64", "FloorF32", @@ -54,6 +61,7 @@ impl FromStr for LibCall { fn from_str(s: &str) -> Result { match s { + "Probestack" => Ok(LibCall::Probestack), "CeilF32" => Ok(LibCall::CeilF32), "CeilF64" => Ok(LibCall::CeilF64), "FloorF32" => Ok(LibCall::FloorF32), @@ -97,6 +105,96 @@ impl LibCall { } } +/// Get a function reference for `libcall` in `func`, following the signature +/// for `inst`. +/// +/// If there is an existing reference, use it, otherwise make a new one. +pub fn get_libcall_funcref( + libcall: LibCall, + func: &mut Function, + inst: Inst, + isa: &TargetIsa, +) -> FuncRef { + find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa)) +} + +/// Get a function reference for the probestack function in `func`. +/// +/// If there is an existing reference, use it, otherwise make a new one. +pub fn get_probestack_funcref( + func: &mut Function, + reg_type: Type, + arg_reg: RegUnit, + isa: &TargetIsa, +) -> FuncRef { + find_funcref(LibCall::Probestack, func).unwrap_or_else(|| { + make_funcref_for_probestack(func, reg_type, arg_reg, isa) + }) +} + +/// Get the existing function reference for `libcall` in `func` if it exists. +fn find_funcref(libcall: LibCall, func: &Function) -> Option { + // We're assuming that all libcall function decls are at the end. + // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless. + for (fref, func_data) in func.dfg.ext_funcs.iter().rev() { + match func_data.name { + ExternalName::LibCall(lc) => { + if lc == libcall { + return Some(fref); + } + } + _ => break, + } + } + None +} + +/// Create a funcref for `LibCall::Probestack`. +fn make_funcref_for_probestack( + func: &mut Function, + reg_type: Type, + arg_reg: RegUnit, + isa: &TargetIsa, +) -> FuncRef { + let mut sig = Signature::new(CallConv::Probestack); + let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg); + sig.params.push(rax); + if !isa.flags().probestack_func_adjusts_sp() { + sig.returns.push(rax); + } + make_funcref(LibCall::Probestack, func, sig, isa) +} + +/// Create a funcref for `libcall` with a signature matching `inst`. +fn make_funcref_for_inst( + libcall: LibCall, + func: &mut Function, + inst: Inst, + isa: &TargetIsa, +) -> FuncRef { + // Start with a fast calling convention. We'll give the ISA a chance to change it. + let mut sig = Signature::new(isa.flags().call_conv()); + for &v in func.dfg.inst_args(inst) { + sig.params.push(AbiParam::new(func.dfg.value_type(v))); + } + for &v in func.dfg.inst_results(inst) { + sig.returns.push(AbiParam::new(func.dfg.value_type(v))); + } + + make_funcref(libcall, func, sig, isa) +} + +/// Create a funcref for `libcall`. +fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef { + let sigref = func.import_signature(sig); + + func.import_function(ExtFuncData { + name: ExternalName::LibCall(libcall), + signature: sigref, + colocated: isa.flags().colocated_libcalls(), + }) +} + #[cfg(test)] mod test { use super::*; diff --git a/lib/codegen/src/ir/mod.rs b/lib/codegen/src/ir/mod.rs index ab14a305aa..3ee7dc9176 100644 --- a/lib/codegen/src/ir/mod.rs +++ b/lib/codegen/src/ir/mod.rs @@ -33,7 +33,7 @@ pub use ir::heap::{HeapBase, HeapData, HeapStyle}; pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs}; pub use ir::jumptable::JumpTableData; pub use ir::layout::Layout; -pub use ir::libcall::LibCall; +pub use ir::libcall::{LibCall, get_libcall_funcref, get_probestack_funcref}; pub use ir::memflags::MemFlags; pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; pub use ir::sourceloc::SourceLoc; diff --git a/lib/codegen/src/isa/x86/abi.rs b/lib/codegen/src/isa/x86/abi.rs index 0e4a43ee57..5ab9febb29 100644 --- a/lib/codegen/src/isa/x86/abi.rs +++ b/lib/codegen/src/isa/x86/abi.rs @@ -6,7 +6,8 @@ use cursor::{Cursor, CursorPosition, EncCursor}; use ir; use ir::immediates::Imm64; use ir::stackslot::{StackOffset, StackSize}; -use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc}; +use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc, + get_probestack_funcref}; use isa::{RegClass, RegUnit, TargetIsa}; use regalloc::RegisterSet; use result; @@ -216,10 +217,16 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::Ct } CallConv::Fastcall => unimplemented!("Windows calling conventions"), CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa), + CallConv::Probestack => unimplemented!("probestack calling convention"), } } pub fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult { + debug_assert!( + !isa.flags().probestack_enabled(), + "baldrdash does not expect cretonne to emit stack probes" + ); + // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes. let stack_align = 16; let word_size = if isa.flags().is_64bit() { 8 } else { 4 }; @@ -239,7 +246,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r // newer versions use a 16-byte aligned stack pointer. let stack_align = 16; let word_size = if isa.flags().is_64bit() { 8 } else { 4 }; - let csr_type = if isa.flags().is_64bit() { + let reg_type = if isa.flags().is_64bit() { ir::types::I64 } else { ir::types::I32 @@ -266,7 +273,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r // Add CSRs to function signature let fp_arg = ir::AbiParam::special_reg( - csr_type, + reg_type, ir::ArgumentPurpose::FramePointer, RU::rbp as RegUnit, ); @@ -274,7 +281,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r func.signature.returns.push(fp_arg); for csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr); + let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); func.signature.params.push(csr_arg); func.signature.returns.push(csr_arg); } @@ -282,11 +289,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r // Set up the cursor and insert the prologue let entry_ebb = func.layout.entry_block().expect("missing entry block"); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb); - insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs); + insert_system_v_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); // Reset the cursor and insert the epilogue let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs); + insert_system_v_epilogues(&mut pos, local_stack_size, reg_type, &csrs); Ok(()) } @@ -295,12 +302,13 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r fn insert_system_v_prologue( pos: &mut EncCursor, stack_size: i64, - csr_type: ir::types::Type, + reg_type: ir::types::Type, csrs: &RegisterSet, + isa: &TargetIsa, ) { // Append param to entry EBB let ebb = pos.current_ebb().expect("missing ebb under cursor"); - let fp = pos.func.dfg.append_ebb_param(ebb, csr_type); + let fp = pos.func.dfg.append_ebb_param(ebb, reg_type); pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); pos.ins().x86_push(fp); @@ -311,7 +319,7 @@ fn insert_system_v_prologue( for reg in csrs.iter(GPR) { // Append param to entry EBB - let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type); + let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type); // Assign it a location pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); @@ -320,8 +328,48 @@ fn insert_system_v_prologue( pos.ins().x86_push(csr_arg); } + // Allocate stack frame storage. if stack_size > 0 { - pos.ins().adjust_sp_imm(Imm64::new(-stack_size)); + if isa.flags().probestack_enabled() && + stack_size > (1 << isa.flags().probestack_size_log2()) + { + // Emit a stack probe. + let rax = RU::rax as RegUnit; + let rax_val = ir::ValueLoc::Reg(rax); + + // The probestack function expects its input in %rax. + let arg = pos.ins().iconst(reg_type, stack_size); + pos.func.locations[arg] = rax_val; + + // Call the probestack function. + let callee = get_probestack_funcref(pos.func, reg_type, rax, isa); + + // Make the call. + let call = if !isa.flags().is_pic() && isa.flags().is_64bit() && + !pos.func.dfg.ext_funcs[callee].colocated + { + // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect. + // Use r11 as it may be clobbered under all supported calling conventions. + let r11 = RU::r11 as RegUnit; + let sig = pos.func.dfg.ext_funcs[callee].signature; + let addr = pos.ins().func_addr(reg_type, callee); + pos.func.locations[addr] = ir::ValueLoc::Reg(r11); + pos.ins().call_indirect(sig, addr, &[arg]) + } else { + // Otherwise just do a normal call. + pos.ins().call(callee, &[arg]) + }; + + // If the probestack function doesn't adjust sp, do it ourselves. + if !isa.flags().probestack_func_adjusts_sp() { + let result = pos.func.dfg.inst_results(call)[0]; + pos.func.locations[result] = rax_val; + pos.ins().adjust_sp_down(result); + } + } else { + // Simply decrement the stack pointer. + pos.ins().adjust_sp_down_imm(Imm64::new(stack_size)); + } } } @@ -329,14 +377,14 @@ fn insert_system_v_prologue( fn insert_system_v_epilogues( pos: &mut EncCursor, stack_size: i64, - csr_type: ir::types::Type, + reg_type: ir::types::Type, csrs: &RegisterSet, ) { while let Some(ebb) = pos.next_ebb() { pos.goto_last_inst(ebb); if let Some(inst) = pos.current_inst() { if pos.func.dfg[inst].opcode().is_return() { - insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs); + insert_system_v_epilogue(inst, stack_size, pos, reg_type, csrs); } } } @@ -347,23 +395,23 @@ fn insert_system_v_epilogue( inst: ir::Inst, stack_size: i64, pos: &mut EncCursor, - csr_type: ir::types::Type, + reg_type: ir::types::Type, csrs: &RegisterSet, ) { if stack_size > 0 { - pos.ins().adjust_sp_imm(Imm64::new(stack_size)); + pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); } // Pop all the callee-saved registers, stepping backward each time to // preserve the correct order. - let fp_ret = pos.ins().x86_pop(csr_type); + let fp_ret = pos.ins().x86_pop(reg_type); pos.prev_inst(); pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit); pos.func.dfg.append_inst_arg(inst, fp_ret); for reg in csrs.iter(GPR) { - let csr_ret = pos.ins().x86_pop(csr_type); + let csr_ret = pos.ins().x86_pop(reg_type); pos.prev_inst(); pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg); diff --git a/lib/codegen/src/legalizer/libcall.rs b/lib/codegen/src/legalizer/libcall.rs index 7d058f24bb..fd7dd17e7e 100644 --- a/lib/codegen/src/legalizer/libcall.rs +++ b/lib/codegen/src/legalizer/libcall.rs @@ -1,7 +1,7 @@ //! Expanding instructions as runtime library calls. use ir; -use ir::InstBuilder; +use ir::{InstBuilder, get_libcall_funcref}; use std::vec::Vec; use isa::TargetIsa; @@ -14,58 +14,14 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIs None => return false, }; - let funcref = - find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func, isa)); - // Now we convert `inst` to a call. First save the arguments. let mut args = Vec::new(); args.extend_from_slice(func.dfg.inst_args(inst)); // The replace builder will preserve the instruction result values. + let funcref = get_libcall_funcref(libcall, func, inst, isa); func.dfg.replace(inst).call(funcref, &args); // TODO: ask the ISA to legalize the signature. true } - -/// Get the existing function reference for `libcall` in `func` if it exists. -fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option { - // We're assuming that all libcall function decls are at the end. - // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless. - for (fref, func_data) in func.dfg.ext_funcs.iter().rev() { - match func_data.name { - ir::ExternalName::LibCall(lc) => { - if lc == libcall { - return Some(fref); - } - } - _ => break, - } - } - None -} - -/// Create a funcref for `libcall` with a signature matching `inst`. -fn make_funcref( - libcall: ir::LibCall, - inst: ir::Inst, - func: &mut ir::Function, - isa: &TargetIsa, -) -> ir::FuncRef { - // Start with a fast calling convention. We'll give the ISA a chance to change it. - let mut sig = ir::Signature::new(isa.flags().call_conv()); - for &v in func.dfg.inst_args(inst) { - sig.params.push(ir::AbiParam::new(func.dfg.value_type(v))); - } - for &v in func.dfg.inst_results(inst) { - sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v))); - } - let sigref = func.import_signature(sig); - - // TODO: Can libcalls be colocated in some circumstances? - func.import_function(ir::ExtFuncData { - name: ir::ExternalName::LibCall(libcall), - signature: sigref, - colocated: false, - }) -} diff --git a/lib/codegen/src/settings.rs b/lib/codegen/src/settings.rs index e34ee1421f..dbefb260c5 100644 --- a/lib/codegen/src/settings.rs +++ b/lib/codegen/src/settings.rs @@ -363,6 +363,7 @@ mod tests { is_64bit = false\n\ call_conv = \"fast\"\n\ is_pic = false\n\ + colocated_libcalls = false\n\ return_at_end = false\n\ avoid_div_traps = false\n\ is_compressed = false\n\ @@ -370,7 +371,10 @@ mod tests { enable_simd = true\n\ enable_atomics = true\n\ baldrdash_prologue_words = 0\n\ - allones_funcaddrs = false\n" + allones_funcaddrs = false\n\ + probestack_enabled = true\n\ + probestack_func_adjusts_sp = false\n\ + probestack_size_log2 = 12\n" ); assert_eq!(f.opt_level(), super::OptLevel::Default); assert_eq!(f.enable_simd(), true);