load_complex and store_complex instructions (#309)

* Start adding the load_complex and store_complex instructions.

N.b.:
The text format is not correct yet. Requires changes to the lexer and parser.
I'm not sure why I needed to change the RuntimeError to Exception yet. Will fix.

* Get first few encodings of load_complex working. Still needs var args type checking.

* Clean up ModRM helper functions in binemit.

* Implement 32-bit displace for load_complex

* Use encoding helpers instead of doing them all by hand

* Initial implementation of store_complex

* Parse value list for load/store_complex with + as delimiter. Looks nice.

* Add sign/zero-extension and size variants for load_complex.

* Add size variants of store_complex.

* Add asm helper lines to load/store complex bin tests.

* Example of length-checking the instruction ValueList for an encoding. Extremely questionable implementation.

* Fix Python linting issues

* First draft of postopt pass to fold adds and loads into load_complex. Just simple loads for now.

* Optimization pass now works with all types of loads.

* Add store+add -> store_complex to postopt pass

* Put complex address optimization behind ISA flag.

* Add load/store complex for f32 and f64

* Fixes changes to lexer that broke NaN parsing.

Abstracts away the repeated checks for whether or not the characters
following a + or - are going to be parsed as a number or not.

* Fix formatting issues

* Fix register restrictions for complex addresses.

* Encoding tests for x86-32.

* Add documentation for newly added instructions, recipes, and cdsl changes.

* Fix python formatting again

* Apply value-list length predicates to all LoadComplex and StoreComplex instructions.

* Add predicate types to new encoding helpers for mypy.

* Import FieldPredicate to satisfy mypy.

* Add and fix some "asm" strings in the encoding tests.

* Line-up 'bin' comments in x86/binary64 test

* Test parsing of offset-less store_complex instruction.

* 'sNaN' not 'sNan'

* Bounds check the lookup for polymorphic typevar operand.

* Fix encodings for istore16_complex.
This commit is contained in:
Tyler McMullen
2018-05-09 12:07:00 -07:00
committed by Dan Gohman
parent 5aa84a744b
commit f636d795c5
25 changed files with 1127 additions and 21 deletions

View File

@@ -57,7 +57,9 @@ CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
FuncAddr = InstructionFormat(func_ref)
Load = InstructionFormat(memflags, VALUE, offset32)
LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32)
Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32)
StackLoad = InstructionFormat(stack_slot, offset32)
StackStore = InstructionFormat(VALUE, stack_slot, offset32)

View File

@@ -246,6 +246,7 @@ x = Operand('x', Mem, doc='Value to be stored')
a = Operand('a', Mem, doc='Value loaded')
p = Operand('p', iAddr)
Flags = Operand('Flags', memflags)
args = Operand('args', VARIABLE_ARGS, doc='Address arguments')
load = Instruction(
'load', r"""
@@ -256,6 +257,15 @@ load = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
load_complex = Instruction(
'load_complex', r"""
Load from memory at ``sum(args) + Offset``.
This is a polymorphic instruction that can load any value type which
has a memory representation.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
store = Instruction(
'store', r"""
Store ``x`` to memory at ``p + Offset``.
@@ -265,6 +275,16 @@ store = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
store_complex = Instruction(
'store_complex', r"""
Store ``x`` to memory at ``sum(args) + Offset``.
This is a polymorphic instruction that can store any value type with a
memory representation.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt8 = TypeVar(
'iExt8', 'An integer type with more than 8 bits',
ints=(16, 64))
@@ -279,6 +299,14 @@ uload8 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload8_complex = Instruction(
'uload8_complex', r"""
Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i8`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload8 = Instruction(
'sload8', r"""
Load 8 bits from memory at ``p + Offset`` and sign-extend.
@@ -287,6 +315,14 @@ sload8 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload8_complex = Instruction(
'sload8_complex', r"""
Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i8`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore8 = Instruction(
'istore8', r"""
Store the low 8 bits of ``x`` to memory at ``p + Offset``.
@@ -295,6 +331,14 @@ istore8 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore8_complex = Instruction(
'istore8_complex', r"""
Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt16 = TypeVar(
'iExt16', 'An integer type with more than 16 bits',
ints=(32, 64))
@@ -309,6 +353,14 @@ uload16 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload16_complex = Instruction(
'uload16_complex', r"""
Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i16`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload16 = Instruction(
'sload16', r"""
Load 16 bits from memory at ``p + Offset`` and sign-extend.
@@ -317,6 +369,14 @@ sload16 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload16_complex = Instruction(
'sload16_complex', r"""
Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i16`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore16 = Instruction(
'istore16', r"""
Store the low 16 bits of ``x`` to memory at ``p + Offset``.
@@ -325,6 +385,14 @@ istore16 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore16_complex = Instruction(
'istore16_complex', r"""
Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt32 = TypeVar(
'iExt32', 'An integer type with more than 32 bits',
ints=(64, 64))
@@ -339,6 +407,14 @@ uload32 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload32_complex = Instruction(
'uload32_complex', r"""
Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i32`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload32 = Instruction(
'sload32', r"""
Load 32 bits from memory at ``p + Offset`` and sign-extend.
@@ -347,6 +423,14 @@ sload32 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload32_complex = Instruction(
'sload32_complex', r"""
Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i32`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore32 = Instruction(
'istore32', r"""
Store the low 32 bits of ``x`` to memory at ``p + Offset``.
@@ -355,6 +439,14 @@ istore32 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore32_complex = Instruction(
'istore32_complex', r"""
Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
""",
ins=(Flags, x, args, Offset), can_store=True)
x = Operand('x', Mem, doc='Value to be stored')
a = Operand('a', Mem, doc='Value loaded')
Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot')

View File

@@ -2,12 +2,12 @@
Cretonne predicates that consider `Function` fields.
"""
from cdsl.predicates import FieldPredicate
from .formats import UnaryGlobalVar
from .formats import UnaryGlobalVar, InstructionFormat
try:
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from cdsl.formats import FormatField # noqa
from cdsl.formats import InstructionFormat, FormatField # noqa
except ImportError:
pass
@@ -33,3 +33,10 @@ class IsColocatedData(FieldPredicate):
# type: () -> None
super(IsColocatedData, self).__init__(
UnaryGlobalVar.global_var, 'is_colocated_data', ('func',))
class LengthEquals(FieldPredicate):
def __init__(self, iform, num):
# type: (InstructionFormat, int) -> None
super(LengthEquals, self).__init__(
iform.args(), 'has_length_of', (num, 'func'))

View File

@@ -103,6 +103,19 @@ class InstructionFormat(object):
InstructionFormat._registry[sig] = self
InstructionFormat.all_formats.append(self)
def args(self):
# type: () -> FormatField
"""
Provides a ValueListField, which is derived from FormatField,
corresponding to the full ValueList of the instruction format. This
is useful for creating predicates for instructions which use variadic
arguments.
"""
if self.has_value_list:
return ValueListField(self)
return None
def _process_member_names(self, kinds):
# type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField] # noqa
"""
@@ -210,7 +223,7 @@ class FormatField(object):
This corresponds to a single member of a variant of the `InstructionData`
data type.
:param iformat: Parent `InstructionFormat`.
:param iform: Parent `InstructionFormat`.
:param immnum: Immediate operand number in parent.
:param kind: Immediate Operand kind.
:param member: Member name in `InstructionData` variant.
@@ -227,6 +240,29 @@ class FormatField(object):
# type: () -> str
return '{}.{}'.format(self.format.name, self.member)
def rust_destructuring_name(self):
# type: () -> str
return self.member
def rust_name(self):
# type: () -> str
return self.member
class ValueListField(FormatField):
"""
The full value list field of an instruction format.
This corresponds to all Value-type members of a variant of the
`InstructionData` format, which contains a ValueList.
:param iform: Parent `InstructionFormat`.
"""
def __init__(self, iform):
# type: (InstructionFormat) -> None
self.format = iform
self.member = "args"
def rust_destructuring_name(self):
# type: () -> str
return 'ref {}'.format(self.member)

View File

@@ -201,9 +201,10 @@ class Instruction(object):
# Prefer to use the typevar_operand to infer the controlling typevar.
self.use_typevar_operand = False
typevar_error = None
if self.format.typevar_operand is not None:
tv_op = self.format.typevar_operand
if tv_op is not None and tv_op < len(self.value_opnums):
try:
opnum = self.value_opnums[self.format.typevar_operand]
opnum = self.value_opnums[tv_op]
tv = self.ins[opnum].typevar
if tv is tv.free_typevar() or tv.singleton_type() is not None:
self.other_typevars = self._verify_ctrl_typevar(tv)

View File

@@ -27,7 +27,7 @@ def gen_recipe(recipe, fmt):
nvops = iform.num_value_operands
want_args = any(isinstance(i, RegClass) or isinstance(i, Stack)
for i in recipe.ins)
assert not want_args or nvops > 0
assert not want_args or nvops > 0 or iform.has_value_list
want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack)
for o in recipe.outs)

View File

@@ -103,7 +103,7 @@ def emit_instp(instp, fmt, has_func=False):
fnames = set() # type: Set[str]
for p in leafs:
if isinstance(p, FieldPredicate):
fnames.add(p.field.rust_name())
fnames.add(p.field.rust_destructuring_name())
else:
assert isinstance(p, TypePredicate)
has_type_check = True

View File

@@ -3,9 +3,9 @@ x86 Encodings.
"""
from __future__ import absolute_import
from cdsl.predicates import IsUnsignedInt, Not, And
from base.predicates import IsColocatedFunc, IsColocatedData
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
from base import instructions as base
from base.formats import UnaryImm, FuncAddr, Call
from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
from .defs import X86_64, X86_32
from . import recipes as r
from . import settings as cfg
@@ -19,6 +19,7 @@ try:
from typing import TYPE_CHECKING, Any # noqa
if TYPE_CHECKING:
from cdsl.instructions import MaybeBoundInst # noqa
from cdsl.predicates import FieldPredicate # noqa
except ImportError:
pass
@@ -54,6 +55,15 @@ def enc_x86_64(inst, recipe, *args, **kwargs):
X86_64.enc(inst, *recipe(*args, **kwargs))
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
def enc_both(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
"""
@@ -63,6 +73,15 @@ def enc_both(inst, recipe, *args, **kwargs):
enc_x86_64(inst, recipe, *args, **kwargs)
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
def enc_i32_i64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
@@ -80,6 +99,25 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
Similar to `enc_i32_i64` but applies `instp` to each encoding.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
"""
@@ -212,6 +250,31 @@ X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
#
# Loads and stores.
#
ldcomplexp = LengthEquals(LoadComplex, 2)
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
instp=ldcomplexp)
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
stcomplexp = LengthEquals(StoreComplex, 3)
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
for recipe in [r.stWithIndex_abcd,
r.stWithIndexDisp8_abcd,
r.stWithIndexDisp32_abcd]:
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
for recipe in [r.st, r.stDisp8, r.stDisp32]:
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
@@ -286,18 +349,34 @@ enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)

View File

@@ -14,6 +14,7 @@ from base.formats import IntSelect, IntCondTrap, FloatCondTrap
from base.formats import Jump, Branch, BranchInt, BranchFloat
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
from base.formats import RegMove, RegSpill, RegFill, CopySpecial
from base.formats import LoadComplex, StoreComplex
from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE
from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG
from .registers import StackGPR32, StackFPR32
@@ -739,6 +740,22 @@ st = TailRecipe(
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset.
stWithIndex = TailRecipe(
'stWithIndex', StoreComplex, size=2,
ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store with no offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
st_abcd = TailRecipe(
@@ -754,6 +771,23 @@ st_abcd = TailRecipe(
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndex_abcd = TailRecipe(
'stWithIndex_abcd', StoreComplex, size=2,
ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store of FPR with no offset.
fst = TailRecipe(
'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
@@ -766,6 +800,20 @@ fst = TailRecipe(
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset of FPR.
fstWithIndex = TailRecipe(
'fstWithIndex', StoreComplex, size=2,
ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store with 8-bit offset.
stDisp8 = TailRecipe(
@@ -781,6 +829,27 @@ stDisp8 = TailRecipe(
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset.
stWithIndexDisp8 = TailRecipe(
'stWithIndexDisp8', StoreComplex, size=3,
ins=(GPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 8-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stDisp8_abcd = TailRecipe(
'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(),
instp=IsSignedInt(Store.offset, 8),
@@ -795,6 +864,27 @@ stDisp8_abcd = TailRecipe(
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndexDisp8_abcd = TailRecipe(
'stWithIndexDisp8_abcd', StoreComplex, size=3,
ins=(ABCD, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 8-bit offset of FPR.
fstDisp8 = TailRecipe(
'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(),
instp=IsSignedInt(Store.offset, 8),
@@ -809,6 +899,24 @@ fstDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset of FPR.
fstWithIndexDisp8 = TailRecipe(
'fstWithIndexDisp8', StoreComplex, size=3,
ins=(FPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 32-bit offset.
stDisp32 = TailRecipe(
'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
@@ -822,6 +930,27 @@ stDisp32 = TailRecipe(
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset.
stWithIndexDisp32 = TailRecipe(
'stWithIndexDisp32', StoreComplex, size=6,
ins=(GPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with 32-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stDisp32_abcd = TailRecipe(
'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(),
when_prefixed=stDisp32,
@@ -835,6 +964,27 @@ stDisp32_abcd = TailRecipe(
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndexDisp32_abcd = TailRecipe(
'stWithIndexDisp32_abcd', StoreComplex, size=6,
ins=(ABCD, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with 32-bit offset of FPR.
fstDisp32 = TailRecipe(
'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
clobbers_flags=False,
@@ -848,6 +998,24 @@ fstDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset of FPR.
fstWithIndexDisp32 = TailRecipe(
'fstWithIndexDisp32', StoreComplex, size=6,
ins=(FPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# Unary spill with SIB and 32-bit displacement.
spillSib32 = TailRecipe(
'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
@@ -919,6 +1087,22 @@ ld = TailRecipe(
modrm_rm(in_reg0, out_reg0, sink);
''')
# XX /r load with index and no offset.
ldWithIndex = TailRecipe(
'ldWithIndex', LoadComplex, size=2,
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsEqual(LoadComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
''')
# XX /r float load with no offset.
fld = TailRecipe(
'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR),
@@ -932,6 +1116,22 @@ fld = TailRecipe(
modrm_rm(in_reg0, out_reg0, sink);
''')
# XX /r float load with index and no offset.
fldWithIndex = TailRecipe(
'fldWithIndex', LoadComplex, size=2,
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsEqual(LoadComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
''')
# XX /r load with 8-bit offset.
ldDisp8 = TailRecipe(
'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -947,6 +1147,24 @@ ldDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r load with index and 8-bit offset.
ldWithIndexDisp8 = TailRecipe(
'ldWithIndexDisp8', LoadComplex, size=3,
ins=(GPR, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsSignedInt(LoadComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp8(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r float load with 8-bit offset.
fldDisp8 = TailRecipe(
'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -962,6 +1180,24 @@ fldDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r float load with 8-bit offset.
fldWithIndexDisp8 = TailRecipe(
'fldWithIndexDisp8', LoadComplex, size=3,
ins=(GPR, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsSignedInt(LoadComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp8(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r load with 32-bit offset.
ldDisp32 = TailRecipe(
'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -977,6 +1213,24 @@ ldDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r load with index and 32-bit offset.
ldWithIndexDisp32 = TailRecipe(
'ldWithIndexDisp32', LoadComplex, size=6,
ins=(GPR, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsSignedInt(LoadComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp32(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r float load with 32-bit offset.
fldDisp32 = TailRecipe(
'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -992,6 +1246,24 @@ fldDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r float load with index and 32-bit offset.
fldWithIndexDisp32 = TailRecipe(
'fldWithIndexDisp32', LoadComplex, size=6,
ins=(GPR, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsSignedInt(LoadComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp32(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# Unary fill with SIB and 32-bit displacement.
fillSib32 = TailRecipe(
'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,

View File

@@ -162,6 +162,11 @@ pub trait TargetIsa: fmt::Display {
false
}
/// Does the CPU implement multi-register addressing?
fn uses_complex_addresses(&self) -> bool {
false
}
/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;

View File

@@ -46,6 +46,18 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
BASE_REX | b | (r << 2)
}
// Create a three-register REX prefix, setting:
//
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
// REX.R = bit 3 of reg register.
// REX.X = bit 3 of SIB index register.
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
let b = ((rm >> 3) & 1) as u8;
let r = ((reg >> 3) & 1) as u8;
let x = ((index >> 3) & 1) as u8;
BASE_REX | b | (x << 1) | (r << 2)
}
// Emit a REX prefix.
//
// The R, X, and B bits are computed from registers using the functions above. The W bit is
@@ -211,7 +223,19 @@ fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS)
sink.put1(b);
}
/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_rm(0b100, reg, sink);
}
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
/// displacement are present.
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp8(0b100, reg, sink);
}
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
/// displacement are present.
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp32(0b100, reg, sink);
}
@@ -225,6 +249,16 @@ fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
sink.put1(b);
}
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
// SIB SS_III_BBB.
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
let scale = scale & 3;
let index = index as u8 & 7;
let base = base as u8 & 7;
let b: u8 = (scale << 6) | (index << 3) | base;
sink.put1(b);
}
/// Get the low 4 bits of an opcode for an integer condition code.
///
/// Add this offset to a base opcode for:

View File

@@ -62,6 +62,10 @@ impl TargetIsa for Isa {
true
}
fn uses_complex_addresses(&self) -> bool {
true
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}

View File

@@ -5,9 +5,9 @@
use cursor::{Cursor, EncCursor};
use ir::condcodes::{CondCode, FloatCC, IntCC};
use ir::dfg::ValueDef;
use ir::immediates::Imm64;
use ir::immediates::{Imm64, Offset32};
use ir::instructions::{Opcode, ValueList};
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value, Type, MemFlags};
use isa::TargetIsa;
use timing;
@@ -173,6 +173,158 @@ fn optimize_cpu_flags(
pos.func.update_encoding(info.br_inst, isa).is_ok();
}
struct MemOpInfo {
opcode: Opcode,
inst: Inst,
itype: Type,
arg: Value,
st_arg: Option<Value>,
flags: MemFlags,
offset: Offset32,
add_args: Option<[Value; 2]>,
}
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
let mut info = match pos.func.dfg[inst] {
InstructionData::Load {
opcode,
arg,
flags,
offset,
} => MemOpInfo {
opcode: opcode,
inst: inst,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: arg,
st_arg: None,
flags: flags,
offset: offset,
add_args: None,
},
InstructionData::Store {
opcode,
args,
flags,
offset,
} => MemOpInfo {
opcode: opcode,
inst: inst,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: args[1],
st_arg: Some(args[0]),
flags: flags,
offset: offset,
add_args: None,
},
_ => return,
};
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
match pos.func.dfg[result_inst] {
InstructionData::Binary { opcode, args } if opcode == Opcode::Iadd => {
info.add_args = Some(args.clone());
}
_ => return,
}
} else {
return;
}
match info.opcode {
Opcode::Load => {
pos.func.dfg.replace(info.inst).load_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload8 => {
pos.func.dfg.replace(info.inst).uload8_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload8 => {
pos.func.dfg.replace(info.inst).sload8_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload16 => {
pos.func.dfg.replace(info.inst).uload16_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload16 => {
pos.func.dfg.replace(info.inst).sload16_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload32 => {
pos.func.dfg.replace(info.inst).uload32_complex(
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload32 => {
pos.func.dfg.replace(info.inst).sload32_complex(
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Store => {
pos.func.dfg.replace(info.inst).store_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore8 => {
pos.func.dfg.replace(info.inst).istore8_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore16 => {
pos.func.dfg.replace(info.inst).istore16_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore32 => {
pos.func.dfg.replace(info.inst).istore32_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
_ => return,
}
pos.func.update_encoding(info.inst, isa).is_ok();
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
@@ -198,6 +350,10 @@ pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
}
}
}
if isa.uses_complex_addresses() {
optimize_complex_addresses(&mut pos, inst, isa);
}
}
}
}

View File

@@ -46,6 +46,11 @@ pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool
}
}
#[allow(dead_code)]
pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
value_list.len(&func.dfg.value_lists) == num
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -335,6 +335,12 @@ impl<'a> Verifier<'a> {
RegFill { src, .. } => {
self.verify_stack_slot(inst, src)?;
}
LoadComplex { ref args, .. } => {
self.verify_value_list(inst, args)?;
}
StoreComplex { ref args, .. } => {
self.verify_value_list(inst, args)?;
}
// Exhaustive list so we can't forget to add new formats
Unary { .. } |
@@ -1149,8 +1155,8 @@ impl<'a> Verifier<'a> {
mod tests {
use super::{Error, Verifier};
use entity::EntityList;
use ir::Function;
use ir::instructions::{InstructionData, Opcode};
use ir::Function;
use settings;
macro_rules! assert_err_with_msg {

View File

@@ -369,12 +369,44 @@ pub fn write_operands(
} => write!(w, " {}, {}{}", arg, stack_slot, offset),
HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
Load { flags, arg, offset, .. } => write!(w, "{} {}{}", flags, arg, offset),
LoadComplex {
flags,
ref args,
offset,
..
} => {
let args = args.as_slice(pool);
write!(
w,
"{} {}{}",
flags,
DisplayValuesWithDelimiter(&args, '+'),
offset
)
}
Store {
flags,
args,
offset,
..
} => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset),
StoreComplex {
flags,
ref args,
offset,
..
} => {
let args = args.as_slice(pool);
write!(
w,
"{} {}, {}{}",
flags,
args[0],
DisplayValuesWithDelimiter(&args[1..], '+'),
offset
)
}
RegMove { arg, src, dst, .. } => {
if let Some(isa) = isa {
let regs = isa.register_info();
@@ -450,6 +482,21 @@ impl<'a> fmt::Display for DisplayValues<'a> {
}
}
struct DisplayValuesWithDelimiter<'a>(&'a [Value], char);
impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result {
for (i, val) in self.0.iter().enumerate() {
if i == 0 {
write!(f, "{}", val)?;
} else {
write!(f, "{}{}", self.1, val)?;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use ir::types;