Rework x64 addressing-mode lowering to be slightly more flexible. (#4080)

This PR refactors the x64 backend address-mode lowering to use an incremental-build approach, where it considers each node in a tree of `iadd`s that feed into a load/store address and, at each step, builds the best possible `Amode`. It will combine an arbitrary number of constant offsets (an extension beyond the current rules), and can capture a left-shifted (scaled) index in any position of the tree (another extension). This doesn't have any measurable performance improvement on our Wasm benchmarks in Sightglass, unfortunately, because the IR lowered from wasm32 will do address computation in 32 bits and then `uextend` it to add to the 64-bit heap base. We can't quite lift the 32-bit adds to 64 bits because this loses the wraparound semantics. (We could label adds as "expected not to overflow", and allow *those* to be lifted to 64 bit operations; wasm32 heap address computation should fit this. This is `add nuw` (no unsigned wrap) in LLVM IR terms. That's likely my next step.) Nevertheless, (i) this generalizes the cases we can handle, which should be a good thing, all other things being equal (and in this case, no compile time impact was measured); and (ii) might benefit non-Wasm frontends.
2022-05-02 16:20:39 -07:00
parent 61dc38c065
commit f85047b084
13 changed files with 1416 additions and 742 deletions
--- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif
+++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
@@ -65,3 +65,97 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

+function %amode_reg_reg_imm(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iadd v0, v1
+    v3 = iconst.i64 256
+    v4 = iadd v2, v3
+    v5 = load.i64 v4+64
+    return v5
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    320(%rdi,%rsi,1), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %amode_reg_reg_imm_negative(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iadd v0, v1
+    v3 = iconst.i64 -1
+    v4 = iadd v2, v3
+    v5 = load.i64 v4
+    return v5
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    -1(%rdi,%rsi,1), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %amode_reg_reg_imm_scaled(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 -1
+    v3 = iadd v0, v2
+    v4 = ishl_imm v1, 3
+    v5 = iadd v3, v4
+    v6 = load.i64 v5
+    return v6
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    -1(%rdi,%rsi,8), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+
+function %amode_reg_reg_imm_uext_scaled(i64, i32) -> i64 {
+block0(v0: i64, v1: i32):
+    v2 = iconst.i64 -1
+    v3 = iadd v0, v2
+    v4 = ishl_imm v1, 3
+    v5 = uextend.i64 v4
+    v6 = iadd v3, v5
+    v7 = load.i64 v6
+    return v7
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movl    %esi, %r8d
+;   movq    -1(%rdi,%r8,8), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %amode_reg_reg_imm_uext_scaled_add(i64, i32, i32) -> i64 {
+block0(v0: i64, v1: i32, v2: i32):
+    v3 = iconst.i64 -1
+    v4 = iadd v0, v3
+    v5 = iadd v1, v2
+    v6 = ishl_imm v5, 2
+    v7 = uextend.i64 v6
+    v8 = iadd v4, v7
+    v9 = load.i64 v8
+    return v9
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   addl    %esi, %edx, %esi
+;   movq    -1(%rdi,%rsi,4), %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+