load_complex and store_complex instructions (#309)

* Start adding the load_complex and store_complex instructions. N.b.: The text format is not correct yet. Requires changes to the lexer and parser. I'm not sure why I needed to change the RuntimeError to Exception yet. Will fix. * Get first few encodings of load_complex working. Still needs var args type checking. * Clean up ModRM helper functions in binemit. * Implement 32-bit displace for load_complex * Use encoding helpers instead of doing them all by hand * Initial implementation of store_complex * Parse value list for load/store_complex with + as delimiter. Looks nice. * Add sign/zero-extension and size variants for load_complex. * Add size variants of store_complex. * Add asm helper lines to load/store complex bin tests. * Example of length-checking the instruction ValueList for an encoding. Extremely questionable implementation. * Fix Python linting issues * First draft of postopt pass to fold adds and loads into load_complex. Just simple loads for now. * Optimization pass now works with all types of loads. * Add store+add -> store_complex to postopt pass * Put complex address optimization behind ISA flag. * Add load/store complex for f32 and f64 * Fixes changes to lexer that broke NaN parsing. Abstracts away the repeated checks for whether or not the characters following a + or - are going to be parsed as a number or not. * Fix formatting issues * Fix register restrictions for complex addresses. * Encoding tests for x86-32. * Add documentation for newly added instructions, recipes, and cdsl changes. * Fix python formatting again * Apply value-list length predicates to all LoadComplex and StoreComplex instructions. * Add predicate types to new encoding helpers for mypy. * Import FieldPredicate to satisfy mypy. * Add and fix some "asm" strings in the encoding tests. * Line-up 'bin' comments in x86/binary64 test * Test parsing of offset-less store_complex instruction. * 'sNaN' not 'sNan' * Bounds check the lookup for polymorphic typevar operand. * Fix encodings for istore16_complex.
2018-05-09 12:07:00 -07:00
parent 5aa84a744b
commit f636d795c5
25 changed files with 1127 additions and 21 deletions
--- a/lib/codegen/meta/base/formats.py
+++ b/lib/codegen/meta/base/formats.py
@@ -57,7 +57,9 @@ CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
 FuncAddr = InstructionFormat(func_ref)

 Load = InstructionFormat(memflags, VALUE, offset32)
+LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32)
 Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
+StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32)

 StackLoad = InstructionFormat(stack_slot, offset32)
 StackStore = InstructionFormat(VALUE, stack_slot, offset32)
--- a/lib/codegen/meta/base/instructions.py
+++ b/lib/codegen/meta/base/instructions.py
@@ -246,6 +246,7 @@ x = Operand('x', Mem, doc='Value to be stored')
 a = Operand('a', Mem, doc='Value loaded')
 p = Operand('p', iAddr)
 Flags = Operand('Flags', memflags)
+args = Operand('args', VARIABLE_ARGS, doc='Address arguments')

 load = Instruction(
        'load', r"""
@@ -256,6 +257,15 @@ load = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+load_complex = Instruction(
+        'load_complex', r"""
+        Load from memory at ``sum(args) + Offset``.
+
+        This is a polymorphic instruction that can load any value type which
+        has a memory representation.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 store = Instruction(
        'store', r"""
        Store ``x`` to memory at ``p + Offset``.
@@ -265,6 +275,16 @@ store = Instruction(
        """,
        ins=(Flags, x, p, Offset), can_store=True)

+store_complex = Instruction(
+        'store_complex', r"""
+        Store ``x`` to memory at ``sum(args) + Offset``.
+
+        This is a polymorphic instruction that can store any value type with a
+        memory representation.
+        """,
+        ins=(Flags, x, args, Offset), can_store=True)
+
+
 iExt8 = TypeVar(
        'iExt8', 'An integer type with more than 8 bits',
        ints=(16, 64))
@@ -279,6 +299,14 @@ uload8 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+uload8_complex = Instruction(
+        'uload8_complex', r"""
+        Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i8`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 sload8 = Instruction(
        'sload8', r"""
        Load 8 bits from memory at ``p + Offset`` and sign-extend.
@@ -287,6 +315,14 @@ sload8 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+sload8_complex = Instruction(
+        'sload8_complex', r"""
+        Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i8`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 istore8 = Instruction(
        'istore8', r"""
        Store the low 8 bits of ``x`` to memory at ``p + Offset``.
@@ -295,6 +331,14 @@ istore8 = Instruction(
        """,
        ins=(Flags, x, p, Offset), can_store=True)

+istore8_complex = Instruction(
+        'istore8_complex', r"""
+        Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
+        """,
+        ins=(Flags, x, args, Offset), can_store=True)
+
 iExt16 = TypeVar(
        'iExt16', 'An integer type with more than 16 bits',
        ints=(32, 64))
@@ -309,6 +353,14 @@ uload16 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+uload16_complex = Instruction(
+        'uload16_complex', r"""
+        Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i16`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 sload16 = Instruction(
        'sload16', r"""
        Load 16 bits from memory at ``p + Offset`` and sign-extend.
@@ -317,6 +369,14 @@ sload16 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+sload16_complex = Instruction(
+        'sload16_complex', r"""
+        Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i16`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 istore16 = Instruction(
        'istore16', r"""
        Store the low 16 bits of ``x`` to memory at ``p + Offset``.
@@ -325,6 +385,14 @@ istore16 = Instruction(
        """,
        ins=(Flags, x, p, Offset), can_store=True)

+istore16_complex = Instruction(
+        'istore16_complex', r"""
+        Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
+        """,
+        ins=(Flags, x, args, Offset), can_store=True)
+
 iExt32 = TypeVar(
        'iExt32', 'An integer type with more than 32 bits',
        ints=(64, 64))
@@ -339,6 +407,14 @@ uload32 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+uload32_complex = Instruction(
+        'uload32_complex', r"""
+        Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i32`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 sload32 = Instruction(
        'sload32', r"""
        Load 32 bits from memory at ``p + Offset`` and sign-extend.
@@ -347,6 +423,14 @@ sload32 = Instruction(
        """,
        ins=(Flags, p, Offset), outs=a, can_load=True)

+sload32_complex = Instruction(
+        'sload32_complex', r"""
+        Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i32`` followed by ``uextend``.
+        """,
+        ins=(Flags, args, Offset), outs=a, can_load=True)
+
 istore32 = Instruction(
        'istore32', r"""
        Store the low 32 bits of ``x`` to memory at ``p + Offset``.
@@ -355,6 +439,14 @@ istore32 = Instruction(
        """,
        ins=(Flags, x, p, Offset), can_store=True)

+istore32_complex = Instruction(
+        'istore32_complex', r"""
+        Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
+        """,
+        ins=(Flags, x, args, Offset), can_store=True)
+
 x = Operand('x', Mem, doc='Value to be stored')
 a = Operand('a', Mem, doc='Value loaded')
 Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot')
--- a/lib/codegen/meta/base/predicates.py
+++ b/lib/codegen/meta/base/predicates.py
@@ -2,12 +2,12 @@
 Cretonne predicates that consider `Function` fields.
 """
 from cdsl.predicates import FieldPredicate
-from .formats import UnaryGlobalVar
+from .formats import UnaryGlobalVar, InstructionFormat

 try:
    from typing import TYPE_CHECKING
    if TYPE_CHECKING:
-        from cdsl.formats import FormatField  # noqa
+        from cdsl.formats import InstructionFormat, FormatField  # noqa
 except ImportError:
    pass

@@ -33,3 +33,10 @@ class IsColocatedData(FieldPredicate):
        # type: () -> None
        super(IsColocatedData, self).__init__(
            UnaryGlobalVar.global_var, 'is_colocated_data', ('func',))
+
+
+class LengthEquals(FieldPredicate):
+    def __init__(self, iform, num):
+        # type: (InstructionFormat, int) -> None
+        super(LengthEquals, self).__init__(
+            iform.args(), 'has_length_of', (num, 'func'))
--- a/lib/codegen/meta/cdsl/formats.py
+++ b/lib/codegen/meta/cdsl/formats.py
@@ -103,6 +103,19 @@ class InstructionFormat(object):
        InstructionFormat._registry[sig] = self
        InstructionFormat.all_formats.append(self)

+    def args(self):
+        # type: () -> FormatField
+        """
+        Provides a ValueListField, which is derived from FormatField,
+        corresponding to the full ValueList of the instruction format. This
+        is useful for creating predicates for instructions which use variadic
+        arguments.
+        """
+
+        if self.has_value_list:
+            return ValueListField(self)
+        return None
+
    def _process_member_names(self, kinds):
        # type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField]  # noqa
        """
@@ -210,7 +223,7 @@ class FormatField(object):
    This corresponds to a single member of a variant of the `InstructionData`
    data type.

-    :param iformat: Parent `InstructionFormat`.
+    :param iform: Parent `InstructionFormat`.
    :param immnum: Immediate operand number in parent.
    :param kind: Immediate Operand kind.
    :param member: Member name in `InstructionData` variant.
@@ -227,6 +240,29 @@ class FormatField(object):
        # type: () -> str
        return '{}.{}'.format(self.format.name, self.member)

+    def rust_destructuring_name(self):
+        # type: () -> str
+        return self.member
+
    def rust_name(self):
        # type: () -> str
        return self.member
+
+
+class ValueListField(FormatField):
+    """
+    The full value list field of an instruction format.
+
+    This corresponds to all Value-type members of a variant of the
+    `InstructionData` format, which contains a ValueList.
+
+    :param iform: Parent `InstructionFormat`.
+    """
+    def __init__(self, iform):
+        # type: (InstructionFormat) -> None
+        self.format = iform
+        self.member = "args"
+
+    def rust_destructuring_name(self):
+        # type: () -> str
+        return 'ref {}'.format(self.member)
--- a/lib/codegen/meta/cdsl/instructions.py
+++ b/lib/codegen/meta/cdsl/instructions.py
@@ -201,9 +201,10 @@ class Instruction(object):
        # Prefer to use the typevar_operand to infer the controlling typevar.
        self.use_typevar_operand = False
        typevar_error = None
-        if self.format.typevar_operand is not None:
+        tv_op = self.format.typevar_operand
+        if tv_op is not None and tv_op < len(self.value_opnums):
            try:
-                opnum = self.value_opnums[self.format.typevar_operand]
+                opnum = self.value_opnums[tv_op]
                tv = self.ins[opnum].typevar
                if tv is tv.free_typevar() or tv.singleton_type() is not None:
                    self.other_typevars = self._verify_ctrl_typevar(tv)
--- a/lib/codegen/meta/gen_binemit.py
+++ b/lib/codegen/meta/gen_binemit.py
@@ -27,7 +27,7 @@ def gen_recipe(recipe, fmt):
    nvops = iform.num_value_operands
    want_args = any(isinstance(i, RegClass) or isinstance(i, Stack)
                    for i in recipe.ins)
-    assert not want_args or nvops > 0
+    assert not want_args or nvops > 0 or iform.has_value_list
    want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack)
                    for o in recipe.outs)

--- a/lib/codegen/meta/gen_encoding.py
+++ b/lib/codegen/meta/gen_encoding.py
@@ -103,7 +103,7 @@ def emit_instp(instp, fmt, has_func=False):
    fnames = set()  # type: Set[str]
    for p in leafs:
        if isinstance(p, FieldPredicate):
-            fnames.add(p.field.rust_name())
+            fnames.add(p.field.rust_destructuring_name())
        else:
            assert isinstance(p, TypePredicate)
            has_type_check = True
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -3,9 +3,9 @@ x86 Encodings.
 """
 from __future__ import absolute_import
 from cdsl.predicates import IsUnsignedInt, Not, And
-from base.predicates import IsColocatedFunc, IsColocatedData
+from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
 from base import instructions as base
-from base.formats import UnaryImm, FuncAddr, Call
+from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
 from .defs import X86_64, X86_32
 from . import recipes as r
 from . import settings as cfg
@@ -19,6 +19,7 @@ try:
    from typing import TYPE_CHECKING, Any  # noqa
    if TYPE_CHECKING:
        from cdsl.instructions import MaybeBoundInst  # noqa
+        from cdsl.predicates import FieldPredicate # noqa
 except ImportError:
    pass

@@ -54,6 +55,15 @@ def enc_x86_64(inst, recipe, *args, **kwargs):
    X86_64.enc(inst, *recipe(*args, **kwargs))


+def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
+    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
+    """
+    Add encodings for `inst` to X86_64 with and without a REX prefix.
+    """
+    X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
+    X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
+
+
 def enc_both(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
    """
@@ -63,6 +73,15 @@ def enc_both(inst, recipe, *args, **kwargs):
    enc_x86_64(inst, recipe, *args, **kwargs)


+def enc_both_instp(inst, recipe, instp, *args, **kwargs):
+    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
+    """
+    Add encodings for `inst` to both X86_32 and X86_64.
+    """
+    X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
+    enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
+
+
 def enc_i32_i64(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
@@ -80,6 +99,25 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))


+def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
+    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
+    """
+    Add encodings for `inst.i32` to X86_32.
+    Add encodings for `inst.i32` to X86_64 with and without REX.
+    Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+
+    Similar to `enc_i32_i64` but applies `instp` to each encoding.
+    """
+    X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
+
+    # REX-less encoding must come after REX encoding so we don't use it by
+    # default. Otherwise reg-alloc would never use r8 and up.
+    X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
+    X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
+
+    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
+
+
 def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
    """
@@ -212,6 +250,31 @@ X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
 #
 # Loads and stores.
 #
+
+ldcomplexp = LengthEquals(LoadComplex, 2)
+for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
+    enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
+    enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
+    X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
+               instp=ldcomplexp)
+    enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
+    enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
+    enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
+    enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
+
+stcomplexp = LengthEquals(StoreComplex, 3)
+for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
+    enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
+    enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
+    enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
+    enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
+
+for recipe in [r.stWithIndex_abcd,
+               r.stWithIndexDisp8_abcd,
+               r.stWithIndexDisp32_abcd]:
+    enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
+    enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
+
 for recipe in [r.st, r.stDisp8, r.stDisp32]:
    enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
    enc_x86_64(base.istore32.i64.any, recipe, 0x89)
@@ -286,18 +349,34 @@ enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
 enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
 enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)

+enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
+enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
+enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
+
 enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
 enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
 enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)

+enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
+enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
+enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
+
 enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
 enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
 enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)

+enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
+enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
+enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
+
 enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
 enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
 enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)

+enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
+enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
+enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
+
 enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
 enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
 enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
--- a/lib/codegen/meta/isa/x86/recipes.py
+++ b/lib/codegen/meta/isa/x86/recipes.py
@@ -14,6 +14,7 @@ from base.formats import IntSelect, IntCondTrap, FloatCondTrap
 from base.formats import Jump, Branch, BranchInt, BranchFloat
 from base.formats import Ternary, FuncAddr, UnaryGlobalVar
 from base.formats import RegMove, RegSpill, RegFill, CopySpecial
+from base.formats import LoadComplex, StoreComplex
 from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE
 from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG
 from .registers import StackGPR32, StackFPR32
@@ -739,6 +740,22 @@ st = TailRecipe(
        modrm_rm(in_reg1, in_reg0, sink);
        ''')

+# XX /r register-indirect store with index and no offset.
+stWithIndex = TailRecipe(
+    'stWithIndex', StoreComplex, size=2,
+    ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsEqual(StoreComplex.offset, 0),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    ''')
+
 # XX /r register-indirect store with no offset.
 # Only ABCD allowed for stored value. This is for byte stores with no REX.
 st_abcd = TailRecipe(
@@ -754,6 +771,23 @@ st_abcd = TailRecipe(
        modrm_rm(in_reg1, in_reg0, sink);
        ''')

+# XX /r register-indirect store with index and no offset.
+# Only ABCD allowed for stored value. This is for byte stores with no REX.
+stWithIndex_abcd = TailRecipe(
+    'stWithIndex_abcd', StoreComplex, size=2,
+    ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsEqual(StoreComplex.offset, 0),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    ''')
+
 # XX /r register-indirect store of FPR with no offset.
 fst = TailRecipe(
        'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
@@ -766,6 +800,20 @@ fst = TailRecipe(
        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
        modrm_rm(in_reg1, in_reg0, sink);
        ''')
+# XX /r register-indirect store with index and no offset of FPR.
+fstWithIndex = TailRecipe(
+        'fstWithIndex', StoreComplex, size=2,
+        ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(),
+        instp=IsEqual(StoreComplex.offset, 0),
+        clobbers_flags=False,
+        emit='''
+        if !flags.notrap() {
+            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+        }
+        PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+        modrm_sib(in_reg0, sink);
+        sib(0, in_reg2, in_reg1, sink);
+        ''')

 # XX /r register-indirect store with 8-bit offset.
 stDisp8 = TailRecipe(
@@ -781,6 +829,27 @@ stDisp8 = TailRecipe(
        let offset: i32 = offset.into();
        sink.put1(offset as u8);
        ''')
+
+# XX /r register-indirect store with index and 8-bit offset.
+stWithIndexDisp8 = TailRecipe(
+    'stWithIndexDisp8', StoreComplex, size=3,
+    ins=(GPR, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 8),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp8(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put1(offset as u8);
+    ''')
+
+# XX /r register-indirect store with 8-bit offset.
+# Only ABCD allowed for stored value. This is for byte stores with no REX.
 stDisp8_abcd = TailRecipe(
        'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(),
        instp=IsSignedInt(Store.offset, 8),
@@ -795,6 +864,27 @@ stDisp8_abcd = TailRecipe(
        let offset: i32 = offset.into();
        sink.put1(offset as u8);
        ''')
+
+# XX /r register-indirect store with index and 8-bit offset.
+# Only ABCD allowed for stored value. This is for byte stores with no REX.
+stWithIndexDisp8_abcd = TailRecipe(
+    'stWithIndexDisp8_abcd', StoreComplex, size=3,
+    ins=(ABCD, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 8),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp8(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put1(offset as u8);
+    ''')
+
+# XX /r register-indirect store with 8-bit offset of FPR.
 fstDisp8 = TailRecipe(
        'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(),
        instp=IsSignedInt(Store.offset, 8),
@@ -809,6 +899,24 @@ fstDisp8 = TailRecipe(
        sink.put1(offset as u8);
        ''')

+# XX /r register-indirect store with index and 8-bit offset of FPR.
+fstWithIndexDisp8 = TailRecipe(
+    'fstWithIndexDisp8', StoreComplex, size=3,
+    ins=(FPR, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 8),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp8(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put1(offset as u8);
+    ''')
+
 # XX /r register-indirect store with 32-bit offset.
 stDisp32 = TailRecipe(
        'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
@@ -822,6 +930,27 @@ stDisp32 = TailRecipe(
        let offset: i32 = offset.into();
        sink.put4(offset as u32);
        ''')
+
+# XX /r register-indirect store with index and 32-bit offset.
+stWithIndexDisp32 = TailRecipe(
+    'stWithIndexDisp32', StoreComplex, size=6,
+    ins=(GPR, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 32),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp32(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put4(offset as u32);
+    ''')
+
+# XX /r register-indirect store with 32-bit offset.
+# Only ABCD allowed for stored value. This is for byte stores with no REX.
 stDisp32_abcd = TailRecipe(
        'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(),
        when_prefixed=stDisp32,
@@ -835,6 +964,27 @@ stDisp32_abcd = TailRecipe(
        let offset: i32 = offset.into();
        sink.put4(offset as u32);
        ''')
+
+# XX /r register-indirect store with index and 32-bit offset.
+# Only ABCD allowed for stored value. This is for byte stores with no REX.
+stWithIndexDisp32_abcd = TailRecipe(
+    'stWithIndexDisp32_abcd', StoreComplex, size=6,
+    ins=(ABCD, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 32),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp32(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put4(offset as u32);
+    ''')
+
+# XX /r register-indirect store with 32-bit offset of FPR.
 fstDisp32 = TailRecipe(
        'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
        clobbers_flags=False,
@@ -848,6 +998,24 @@ fstDisp32 = TailRecipe(
        sink.put4(offset as u32);
        ''')

+# XX /r register-indirect store with index and 32-bit offset of FPR.
+fstWithIndexDisp32 = TailRecipe(
+    'fstWithIndexDisp32', StoreComplex, size=6,
+    ins=(FPR, GPR, GPR_DEREF_SAFE),
+    outs=(),
+    instp=IsSignedInt(StoreComplex.offset, 32),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+    modrm_sib_disp32(in_reg0, sink);
+    sib(0, in_reg2, in_reg1, sink);
+    let offset: i32 = offset.into();
+    sink.put4(offset as u32);
+    ''')
+
 # Unary spill with SIB and 32-bit displacement.
 spillSib32 = TailRecipe(
        'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
@@ -919,6 +1087,22 @@ ld = TailRecipe(
        modrm_rm(in_reg0, out_reg0, sink);
        ''')

+# XX /r load with index and no offset.
+ldWithIndex = TailRecipe(
+    'ldWithIndex', LoadComplex, size=2,
+    ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
+    outs=(GPR),
+    instp=IsEqual(LoadComplex.offset, 0),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    ''')
+
 # XX /r float load with no offset.
 fld = TailRecipe(
        'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR),
@@ -932,6 +1116,22 @@ fld = TailRecipe(
        modrm_rm(in_reg0, out_reg0, sink);
        ''')

+# XX /r float load with index and no offset.
+fldWithIndex = TailRecipe(
+    'fldWithIndex', LoadComplex, size=2,
+    ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
+    outs=(FPR),
+    instp=IsEqual(LoadComplex.offset, 0),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    ''')
+
 # XX /r load with 8-bit offset.
 ldDisp8 = TailRecipe(
        'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -947,6 +1147,24 @@ ldDisp8 = TailRecipe(
        sink.put1(offset as u8);
        ''')

+# XX /r load with index and 8-bit offset.
+ldWithIndexDisp8 = TailRecipe(
+    'ldWithIndexDisp8', LoadComplex, size=3,
+    ins=(GPR, GPR_DEREF_SAFE),
+    outs=(GPR),
+    instp=IsSignedInt(LoadComplex.offset, 8),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib_disp8(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    let offset: i32 = offset.into();
+    sink.put1(offset as u8);
+    ''')
+
 # XX /r float load with 8-bit offset.
 fldDisp8 = TailRecipe(
        'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -962,6 +1180,24 @@ fldDisp8 = TailRecipe(
        sink.put1(offset as u8);
        ''')

+# XX /r float load with 8-bit offset.
+fldWithIndexDisp8 = TailRecipe(
+    'fldWithIndexDisp8', LoadComplex, size=3,
+    ins=(GPR, GPR_DEREF_SAFE),
+    outs=(FPR),
+    instp=IsSignedInt(LoadComplex.offset, 8),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib_disp8(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    let offset: i32 = offset.into();
+    sink.put1(offset as u8);
+    ''')
+
 # XX /r load with 32-bit offset.
 ldDisp32 = TailRecipe(
        'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -977,6 +1213,24 @@ ldDisp32 = TailRecipe(
        sink.put4(offset as u32);
        ''')

+# XX /r load with index and 32-bit offset.
+ldWithIndexDisp32 = TailRecipe(
+    'ldWithIndexDisp32', LoadComplex, size=6,
+    ins=(GPR, GPR_DEREF_SAFE),
+    outs=(GPR),
+    instp=IsSignedInt(LoadComplex.offset, 32),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib_disp32(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    let offset: i32 = offset.into();
+    sink.put4(offset as u32);
+    ''')
+
 # XX /r float load with 32-bit offset.
 fldDisp32 = TailRecipe(
        'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -992,6 +1246,24 @@ fldDisp32 = TailRecipe(
        sink.put4(offset as u32);
        ''')

+# XX /r float load with index and 32-bit offset.
+fldWithIndexDisp32 = TailRecipe(
+    'fldWithIndexDisp32', LoadComplex, size=6,
+    ins=(GPR, GPR_DEREF_SAFE),
+    outs=(FPR),
+    instp=IsSignedInt(LoadComplex.offset, 32),
+    clobbers_flags=False,
+    emit='''
+    if !flags.notrap() {
+        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+    }
+    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+    modrm_sib_disp32(out_reg0, sink);
+    sib(0, in_reg1, in_reg0, sink);
+    let offset: i32 = offset.into();
+    sink.put4(offset as u32);
+    ''')
+
 # Unary fill with SIB and 32-bit displacement.
 fillSib32 = TailRecipe(
        'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
--- a/lib/codegen/src/isa/mod.rs
+++ b/lib/codegen/src/isa/mod.rs
@@ -162,6 +162,11 @@ pub trait TargetIsa: fmt::Display {
        false
    }

+    /// Does the CPU implement multi-register addressing?
+    fn uses_complex_addresses(&self) -> bool {
+        false
+    }
+
    /// Get a data structure describing the registers in this ISA.
    fn register_info(&self) -> RegInfo;

--- a/lib/codegen/src/isa/x86/binemit.rs
+++ b/lib/codegen/src/isa/x86/binemit.rs
@@ -46,6 +46,18 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
    BASE_REX | b | (r << 2)
 }

+// Create a three-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+// REX.X = bit 3 of SIB index register.
+fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
+    let b = ((rm >> 3) & 1) as u8;
+    let r = ((reg >> 3) & 1) as u8;
+    let x = ((index >> 3) & 1) as u8;
+    BASE_REX | b | (x << 1) | (r << 2)
+}
+
 // Emit a REX prefix.
 //
 // The R, X, and B bits are computed from registers using the functions above. The W bit is
@@ -211,7 +223,19 @@ fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS)
    sink.put1(b);
 }

-/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
+/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
+fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_rm(0b100, reg, sink);
+}
+
+/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
+/// displacement are present.
+fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+    modrm_disp8(0b100, reg, sink);
+}
+
+/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
+/// displacement are present.
 fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
    modrm_disp32(0b100, reg, sink);
 }
@@ -225,6 +249,16 @@ fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
    sink.put1(b);
 }

+fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
+    // SIB        SS_III_BBB.
+    debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
+    let scale = scale & 3;
+    let index = index as u8 & 7;
+    let base = base as u8 & 7;
+    let b: u8 = (scale << 6) | (index << 3) | base;
+    sink.put1(b);
+}
+
 /// Get the low 4 bits of an opcode for an integer condition code.
 ///
 /// Add this offset to a base opcode for:
--- a/lib/codegen/src/isa/x86/mod.rs
+++ b/lib/codegen/src/isa/x86/mod.rs
@@ -62,6 +62,10 @@ impl TargetIsa for Isa {
        true
    }

+    fn uses_complex_addresses(&self) -> bool {
+        true
+    }
+
    fn register_info(&self) -> RegInfo {
        registers::INFO.clone()
    }
--- a/lib/codegen/src/postopt.rs
+++ b/lib/codegen/src/postopt.rs
@@ -5,9 +5,9 @@
 use cursor::{Cursor, EncCursor};
 use ir::condcodes::{CondCode, FloatCC, IntCC};
 use ir::dfg::ValueDef;
-use ir::immediates::Imm64;
+use ir::immediates::{Imm64, Offset32};
 use ir::instructions::{Opcode, ValueList};
-use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
+use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value, Type, MemFlags};
 use isa::TargetIsa;
 use timing;

@@ -173,6 +173,158 @@ fn optimize_cpu_flags(
    pos.func.update_encoding(info.br_inst, isa).is_ok();
 }

+
+struct MemOpInfo {
+    opcode: Opcode,
+    inst: Inst,
+    itype: Type,
+    arg: Value,
+    st_arg: Option<Value>,
+    flags: MemFlags,
+    offset: Offset32,
+    add_args: Option<[Value; 2]>,
+}
+
+fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
+    let mut info = match pos.func.dfg[inst] {
+        InstructionData::Load {
+            opcode,
+            arg,
+            flags,
+            offset,
+        } => MemOpInfo {
+            opcode: opcode,
+            inst: inst,
+            itype: pos.func.dfg.ctrl_typevar(inst),
+            arg: arg,
+            st_arg: None,
+            flags: flags,
+            offset: offset,
+            add_args: None,
+        },
+        InstructionData::Store {
+            opcode,
+            args,
+            flags,
+            offset,
+        } => MemOpInfo {
+            opcode: opcode,
+            inst: inst,
+            itype: pos.func.dfg.ctrl_typevar(inst),
+            arg: args[1],
+            st_arg: Some(args[0]),
+            flags: flags,
+            offset: offset,
+            add_args: None,
+        },
+        _ => return,
+    };
+
+    if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
+        match pos.func.dfg[result_inst] {
+            InstructionData::Binary { opcode, args } if opcode == Opcode::Iadd => {
+                info.add_args = Some(args.clone());
+            }
+            _ => return,
+        }
+    } else {
+        return;
+    }
+
+    match info.opcode {
+        Opcode::Load => {
+            pos.func.dfg.replace(info.inst).load_complex(
+                info.itype,
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Uload8 => {
+            pos.func.dfg.replace(info.inst).uload8_complex(
+                info.itype,
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Sload8 => {
+            pos.func.dfg.replace(info.inst).sload8_complex(
+                info.itype,
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Uload16 => {
+            pos.func.dfg.replace(info.inst).uload16_complex(
+                info.itype,
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Sload16 => {
+            pos.func.dfg.replace(info.inst).sload16_complex(
+                info.itype,
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Uload32 => {
+            pos.func.dfg.replace(info.inst).uload32_complex(
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Sload32 => {
+            pos.func.dfg.replace(info.inst).sload32_complex(
+                info.flags,
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Store => {
+            pos.func.dfg.replace(info.inst).store_complex(
+                info.flags,
+                info.st_arg.unwrap(),
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Istore8 => {
+            pos.func.dfg.replace(info.inst).istore8_complex(
+                info.flags,
+                info.st_arg.unwrap(),
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Istore16 => {
+            pos.func.dfg.replace(info.inst).istore16_complex(
+                info.flags,
+                info.st_arg.unwrap(),
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        Opcode::Istore32 => {
+            pos.func.dfg.replace(info.inst).istore32_complex(
+                info.flags,
+                info.st_arg.unwrap(),
+                &info.add_args.unwrap(),
+                info.offset,
+            );
+        }
+        _ => return,
+    }
+    pos.func.update_encoding(info.inst, isa).is_ok();
+}
+
+
+
 //----------------------------------------------------------------------
 //
 // The main post-opt pass.
@@ -198,6 +350,10 @@ pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
                    }
                }
            }
+
+            if isa.uses_complex_addresses() {
+                optimize_complex_addresses(&mut pos, inst, isa);
+            }
        }
    }
 }
--- a/lib/codegen/src/predicates.rs
+++ b/lib/codegen/src/predicates.rs
@@ -46,6 +46,11 @@ pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool
    }
 }

+#[allow(dead_code)]
+pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
+    value_list.len(&func.dfg.value_lists) == num
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/lib/codegen/src/verifier/mod.rs
+++ b/lib/codegen/src/verifier/mod.rs
@@ -335,6 +335,12 @@ impl<'a> Verifier<'a> {
            RegFill { src, .. } => {
                self.verify_stack_slot(inst, src)?;
            }
+            LoadComplex { ref args, .. } => {
+                self.verify_value_list(inst, args)?;
+            }
+            StoreComplex { ref args, .. } => {
+                self.verify_value_list(inst, args)?;
+            }

            // Exhaustive list so we can't forget to add new formats
            Unary { .. } |
@@ -1149,8 +1155,8 @@ impl<'a> Verifier<'a> {
 mod tests {
    use super::{Error, Verifier};
    use entity::EntityList;
-    use ir::Function;
    use ir::instructions::{InstructionData, Opcode};
+    use ir::Function;
    use settings;

    macro_rules! assert_err_with_msg {
--- a/lib/codegen/src/write.rs
+++ b/lib/codegen/src/write.rs
@@ -369,12 +369,44 @@ pub fn write_operands(
        } => write!(w, " {}, {}{}", arg, stack_slot, offset),
        HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
        Load { flags, arg, offset, .. } => write!(w, "{} {}{}", flags, arg, offset),
+        LoadComplex {
+            flags,
+            ref args,
+            offset,
+            ..
+        } => {
+            let args = args.as_slice(pool);
+            write!(
+                w,
+                "{} {}{}",
+                flags,
+                DisplayValuesWithDelimiter(&args, '+'),
+                offset
+            )
+
+        }
        Store {
            flags,
            args,
            offset,
            ..
        } => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset),
+        StoreComplex {
+            flags,
+            ref args,
+            offset,
+            ..
+        } => {
+            let args = args.as_slice(pool);
+            write!(
+                w,
+                "{} {}, {}{}",
+                flags,
+                args[0],
+                DisplayValuesWithDelimiter(&args[1..], '+'),
+                offset
+            )
+        }
        RegMove { arg, src, dst, .. } => {
            if let Some(isa) = isa {
                let regs = isa.register_info();
@@ -450,6 +482,21 @@ impl<'a> fmt::Display for DisplayValues<'a> {
    }
 }

+struct DisplayValuesWithDelimiter<'a>(&'a [Value], char);
+
+impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result {
+        for (i, val) in self.0.iter().enumerate() {
+            if i == 0 {
+                write!(f, "{}", val)?;
+            } else {
+                write!(f, "{}{}", self.1, val)?;
+            }
+        }
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use ir::types;