From 22334bcb54bcce89abc2d4161c7c5279504869d6 Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <jolesen@mozilla.com>
Date: Tue, 21 Mar 2017 13:25:08 -0700
Subject: [PATCH] Avoid generating value split instructions.

The legalizer often splits values into parts with the vsplit and
isplit_lohi instructions. Avoid doing that for values that are already
defined by the corresponding concatenation instructions.

This reduces the number of instructions created during legalization, and
it simplifies later optimizations. A number of dead concatenation
instructions are left behind. They can be trivially cleaned up by a dead
code elimination pass.
---
 filetests/isa/riscv/legalize-i64.cton  |  19 ++--
 lib/cretonne/meta/gen_legalizer.py     |  71 ++++++++++-----
 lib/cretonne/src/legalizer/boundary.rs |   5 +-
 lib/cretonne/src/legalizer/mod.rs      |   1 +
 lib/cretonne/src/legalizer/split.rs    | 120 +++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 35 deletions(-)
 create mode 100644 lib/cretonne/src/legalizer/split.rs

diff --git a/filetests/isa/riscv/legalize-i64.cton b/filetests/isa/riscv/legalize-i64.cton
index 1d446f8a47..dfa78447af 100644
--- a/filetests/isa/riscv/legalize-i64.cton
+++ b/filetests/isa/riscv/legalize-i64.cton
@@ -2,47 +2,46 @@
 test legalizer
 isa riscv supports_m=1
 
-; regex: V=v\d+
-; regex: VX=vx\d+
+; regex: V=vx?\d+
 
 function bitwise_and(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
     v3 = band v1, v2
     return v3
 }
-; check: $(v1l=$V), $(v1h=$VX) = isplit
-; check: $(v2l=$V), $(v2h=$VX) = isplit
+; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32):
 ; check: [R#ec
 ; sameln: $(v3l=$V) = band $v1l, $v2l
 ; check: [R#ec
 ; sameln: $(v3h=$V) = band $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
+; check: return $v3l, $v3h
 
 function bitwise_or(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
     v3 = bor v1, v2
     return v3
 }
-; check: $(v1l=$V), $(v1h=$VX) = isplit
-; check: $(v2l=$V), $(v2h=$VX) = isplit
+; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32):
 ; check: [R#cc
 ; sameln: $(v3l=$V) = bor $v1l, $v2l
 ; check: [R#cc
 ; sameln: $(v3h=$V) = bor $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
+; check: return $v3l, $v3h
 
 function bitwise_xor(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
     v3 = bxor v1, v2
     return v3
 }
-; check: $(v1l=$V), $(v1h=$VX) = isplit
-; check: $(v2l=$V), $(v2h=$VX) = isplit
+; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32):
 ; check: [R#8c
 ; sameln: $(v3l=$V) = bxor $v1l, $v2l
 ; check: [R#8c
 ; sameln: $(v3h=$V) = bxor $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
+; check: return $v3l, $v3h
 
 function arith_add(i64, i64) -> i64 {
 ; Legalizing iadd.i64 requires two steps:
@@ -52,8 +51,7 @@ ebb0(v1: i64, v2: i64):
     v3 = iadd v1, v2
     return v3
 }
-; check: $(v1l=$V), $(v1h=$VX) = isplit
-; check: $(v2l=$V), $(v2h=$VX) = isplit
+; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32):
 ; check: [R#0c
 ; sameln: $(v3l=$V) = iadd $v1l, $v2l
 ; check: $(c=$V) = icmp ult, $v3l, $v1l
@@ -63,3 +61,4 @@ ebb0(v1: i64, v2: i64):
 ; check: [R#0c
 ; sameln: $(v3h=$V) = iadd $v3h1, $c
 ; check: $v3 = iconcat $v3l, $v3h
+; check: return $v3l, $v3h
diff --git a/lib/cretonne/meta/gen_legalizer.py b/lib/cretonne/meta/gen_legalizer.py
index 95d16d3663..10e9641df7 100644
--- a/lib/cretonne/meta/gen_legalizer.py
+++ b/lib/cretonne/meta/gen_legalizer.py
@@ -9,7 +9,7 @@ the input instruction.
 """
 from __future__ import absolute_import
 from srcgen import Formatter
-from base import legalize
+from base import legalize, instructions
 from cdsl.ast import Var
 
 try:
@@ -117,36 +117,61 @@ def wrap_tup(seq):
         return '({})'.format(', '.join(tup))
 
 
+def is_value_split(node):
+    # type: (Def) -> bool
+    """
+    Determine if `node` represents one of the value splitting instructions:
+    `isplit` or `vsplit. These instructions are lowered specially by the
+    `legalize::split` module.
+    """
+    if len(node.defs) != 2:
+        return False
+    return node.expr.inst in (instructions.isplit, instructions.vsplit)
+
+
 def emit_dst_inst(node, fmt):
     # type: (Def, Formatter) -> None
     exact_replace = False
     replaced_inst = None  # type: str
     fixup_first_result = False
-    if len(node.defs) == 0:
-        # This node doesn't define any values, so just insert the new
-        # instruction.
-        builder = 'dfg.ins(pos)'
-    else:
-        src_def0 = node.defs[0].src_def
-        if src_def0 and node.defs[0] == src_def0.defs[0]:
-            # The primary result is replacing the primary result of the src
-            # pattern.
-            # Replace the whole instruction.
-            builder = 'let {} = dfg.replace(inst)'.format(wrap_tup(node.defs))
-            replaced_inst = 'inst'
-            # Secondary values weren't replaced if this is an exact replacement
-            # for all the src results.
-            exact_replace = (node.defs == src_def0.defs)
-        else:
-            # Insert a new instruction since its primary def doesn't match the
-            # src.
-            builder = 'let {} = dfg.ins(pos)'.format(wrap_tup(node.defs))
-            fixup_first_result = node.defs[0].is_output()
 
-    fmt.line('{}.{};'.format(builder, node.expr.rust_builder(node.defs)))
+    if is_value_split(node):
+        # Split instructions are not emitted with the builder, but by calling
+        # special functions in the `legalizer::split` module. These functions
+        # will eliminate concat-split patterns.
+        fmt.line(
+                'let {} = split::{}(dfg, pos, {});'
+                .format(
+                    wrap_tup(node.defs),
+                    node.expr.inst.snake_name(),
+                    node.expr.args[0]))
+    else:
+        if len(node.defs) == 0:
+            # This node doesn't define any values, so just insert the new
+            # instruction.
+            builder = 'dfg.ins(pos)'
+        else:
+            src_def0 = node.defs[0].src_def
+            if src_def0 and node.defs[0] == src_def0.defs[0]:
+                # The primary result is replacing the primary result of the
+                # source pattern.
+                # Replace the whole instruction.
+                builder = 'let {} = dfg.replace(inst)'.format(
+                        wrap_tup(node.defs))
+                replaced_inst = 'inst'
+                # Secondary values weren't replaced if this is an exact
+                # replacement for all the source results.
+                exact_replace = (node.defs == src_def0.defs)
+            else:
+                # Insert a new instruction since its primary def doesn't match
+                # the source.
+                builder = 'let {} = dfg.ins(pos)'.format(wrap_tup(node.defs))
+                fixup_first_result = node.defs[0].is_output()
+
+        fmt.line('{}.{};'.format(builder, node.expr.rust_builder(node.defs)))
 
     # If we just replaced an instruction, we need to bump the cursor so
-    # following instructions are inserted *after* the replaced insruction.
+    # following instructions are inserted *after* the replaced instruction.
     if replaced_inst:
         with fmt.indented(
                 'if pos.current_inst() == Some({}) {{'
diff --git a/lib/cretonne/src/legalizer/boundary.rs b/lib/cretonne/src/legalizer/boundary.rs
index 3c98009aed..a53526232a 100644
--- a/lib/cretonne/src/legalizer/boundary.rs
+++ b/lib/cretonne/src/legalizer/boundary.rs
@@ -22,6 +22,7 @@ use ir::{Function, Cursor, DataFlowGraph, Inst, InstBuilder, Ebb, Type, Value, S
          ArgumentType};
 use ir::instructions::CallInfo;
 use isa::TargetIsa;
+use legalizer::split::{isplit, vsplit};
 
 /// Legalize all the function signatures in `func`.
 ///
@@ -271,12 +272,12 @@ fn convert_to_abi<PutArg>(dfg: &mut DataFlowGraph,
     let ty = dfg.value_type(value);
     match legalize_abi_value(ty, &arg_type) {
         ValueConversion::IntSplit => {
-            let (lo, hi) = dfg.ins(pos).isplit(value);
+            let (lo, hi) = isplit(dfg, pos, value);
             convert_to_abi(dfg, pos, lo, put_arg);
             convert_to_abi(dfg, pos, hi, put_arg);
         }
         ValueConversion::VectorSplit => {
-            let (lo, hi) = dfg.ins(pos).vsplit(value);
+            let (lo, hi) = vsplit(dfg, pos, value);
             convert_to_abi(dfg, pos, lo, put_arg);
             convert_to_abi(dfg, pos, hi, put_arg);
         }
diff --git a/lib/cretonne/src/legalizer/mod.rs b/lib/cretonne/src/legalizer/mod.rs
index a315cf069f..56e3e10dd6 100644
--- a/lib/cretonne/src/legalizer/mod.rs
+++ b/lib/cretonne/src/legalizer/mod.rs
@@ -18,6 +18,7 @@ use ir::condcodes::IntCC;
 use isa::{TargetIsa, Legalize};
 
 mod boundary;
+mod split;
 
 /// Legalize `func` for `isa`.
 ///
diff --git a/lib/cretonne/src/legalizer/split.rs b/lib/cretonne/src/legalizer/split.rs
new file mode 100644
index 0000000000..b8323a59fb
--- /dev/null
+++ b/lib/cretonne/src/legalizer/split.rs
@@ -0,0 +1,120 @@
+//! Value splitting.
+//!
+//! Some value types are too large to fit in registers, so they need to be split into smaller parts
+//! that the ISA can operate on. There's two dimensions of splitting, represented by two
+//! complementary instruction pairs:
+//!
+//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
+//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
+//!   lane types.
+//!
+//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
+//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
+//! This breakdown is handled by the ABI lowering.
+//!
+//! When legalizing a single instruction, it is wrapped in splits and concatenations:
+//!
+//!```cton
+//!     v1 = bxor.i64 v2, v3
+//! ```
+//!
+//! becomes:
+//!
+//!```cton
+//!     v20, v21 = isplit v2
+//!     v30, v31 = isplit v3
+//!     v10 = bxor.i32 v20, v30
+//!     v11 = bxor.i32 v21, v31
+//!     v1 = iconcat v10, v11
+//! ```
+//!
+//! This local expansion approach still leaves the original `i64` values in the code as operands on
+//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
+//! values are constantly split and concatenated.
+//!
+//! # Optimized splitting
+//!
+//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
+//! first check if the value is defined by the corresponding concatenation. If so, then just use
+//! the two concatenation inputs directly:
+//!
+//! ```cton
+//!     v4 = iadd_imm.i64 v1, 1
+//! ```
+//!
+//! becomes, using the expanded code from above:
+//!
+//! ```cton
+//!     v40, v5 = iadd_imm_cout.i32 v10, 1
+//!     v6 = bint.i32
+//!     v41 = iadd.i32 v11, v6
+//!     v4 = iconcat v40, v41
+//! ```
+//!
+//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
+//! can be trivially deleted by a dead code elimination pass.
+//!
+//! # EBB arguments
+//!
+//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
+//! up with no `i64` values anywhere, except for EBB arguments. We can work around this by
+//! iteratively splitting EBB arguments too. That should leave us with no illegal value types
+//! anywhere.
+//!
+//! It is possible to have circular dependencies of EBB arguments that are never used by any real
+//! instructions. These loops will remain in the program.
+
+use ir::{DataFlowGraph, Cursor, Value, Opcode, ValueDef, InstructionData, InstBuilder};
+
+/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
+/// if possible.
+pub fn isplit(dfg: &mut DataFlowGraph, pos: &mut Cursor, value: Value) -> (Value, Value) {
+    split_value(dfg, pos, value, Opcode::Iconcat)
+}
+
+/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
+/// possible.
+pub fn vsplit(dfg: &mut DataFlowGraph, pos: &mut Cursor, value: Value) -> (Value, Value) {
+    split_value(dfg, pos, value, Opcode::Vconcat)
+}
+
+/// Split a single value using the integer or vector semantics given by the `concat` opcode.
+///
+/// If the value is defined by a `concat` instruction, just reuse the operand values of that
+/// instruction.
+///
+/// Return the two new values representing the parts of `value`.
+fn split_value(dfg: &mut DataFlowGraph,
+               pos: &mut Cursor,
+               value: Value,
+               concat: Opcode)
+               -> (Value, Value) {
+    let value = dfg.resolve_copies(value);
+    let mut reuse = None;
+
+    match dfg.value_def(value) {
+        ValueDef::Res(inst, num) => {
+            // This is an instruction result. See if the value was created by a `concat`
+            // instruction.
+            if let InstructionData::Binary { opcode, args, .. } = dfg[inst] {
+                assert_eq!(num, 0);
+                if opcode == concat {
+                    reuse = Some((args[0], args[1]));
+                }
+            }
+        }
+        ValueDef::Arg(_ebb, _num) => {}
+    }
+
+    // Did the code above succeed in finding values we can reuse?
+    if let Some(pair) = reuse {
+        pair
+    } else {
+        // No, we'll just have to insert the requested split instruction at `pos`.
+        match concat {
+            Opcode::Iconcat => dfg.ins(pos).isplit(value),
+            Opcode::Vconcat => dfg.ins(pos).vsplit(value),
+            _ => panic!("Unhandled concat opcode: {}", concat),
+        }
+    }
+}