Illegalize rbp/r13 for zero-offset loads on Intel x64 (#225)

* Switch RegClass to a bitmap implementation.

* Add special RegClass to remove r13 from 'ld' recipe.

* Use MASK_LEN constant instead of magic number.

* Enforce that RegClass slicing is only valid on contiguous classes.

* Use Optional[int] for RegClass optional bitmask parameter.

* Add comment explaining use of Intel ISA's GPR_NORIP register class.
This commit is contained in:
Tyler McMullen
2018-01-16 21:05:53 -07:00
committed by Jakob Stoklund Olesen
parent 1e2b7de141
commit eb85aa833c
4 changed files with 77 additions and 36 deletions

View File

@@ -26,12 +26,12 @@ from . import is_power_of_two, next_power_of_two
try: try:
from typing import Sequence, Tuple, List, Dict, Any, TYPE_CHECKING # noqa from typing import Sequence, Tuple, List, Dict, Any, Optional, TYPE_CHECKING # noqa
if TYPE_CHECKING: if TYPE_CHECKING:
from .isa import TargetISA # noqa from .isa import TargetISA # noqa
# A tuple uniquely identifying a register class inside a register bank. # A tuple uniquely identifying a register class inside a register bank.
# (count, width, start) # (width, bitmask)
RCTup = Tuple[int, int, int] RCTup = Tuple[int, int]
except ImportError: except ImportError:
pass pass
@@ -195,24 +195,29 @@ class RegClass(object):
:param start: The first unit to allocate, relative to `bank.first.unit`. :param start: The first unit to allocate, relative to `bank.first.unit`.
""" """
def __init__(self, bank, count=None, width=1, start=0): def __init__(self, bank, count=0, width=1, start=0, bitmask=None):
# type: (RegBank, int, int, int) -> None # type: (RegBank, int, int, int, Optional[int]) -> None
self.name = None # type: str self.name = None # type: str
self.index = None # type: int self.index = None # type: int
self.bank = bank self.bank = bank
self.start = start
self.width = width self.width = width
self.bitmask = 0
# This is computed later in `finish_regclasses()`. # This is computed later in `finish_regclasses()`.
self.subclasses = list() # type: List[RegClass] self.subclasses = list() # type: List[RegClass]
self.toprc = None # type: RegClass self.toprc = None # type: RegClass
assert width > 0 assert width > 0
assert start >= 0 and start < bank.units
if count is None: if bitmask:
self.bitmask = bitmask
else:
assert start >= 0 and start < bank.units
if count == 0:
count = bank.units // width count = bank.units // width
self.count = count for a in range(count):
u = start + a * self.width
self.bitmask |= 1 << u
bank.classes.append(self) bank.classes.append(self)
@@ -238,7 +243,7 @@ class RegClass(object):
The tuple can be used as a dictionary key to ensure that there are no The tuple can be used as a dictionary key to ensure that there are no
duplicate register classes. duplicate register classes.
""" """
return (self.count, self.width, self.start) return (self.width, self.bitmask)
def intersect(self, other): def intersect(self, other):
# type: (RegClass) -> RCTup # type: (RegClass) -> RCTup
@@ -249,17 +254,11 @@ class RegClass(object):
""" """
if self.width != other.width: if self.width != other.width:
return None return None
s_end = self.start + self.count * self.width intersection = self.bitmask & other.bitmask
o_end = other.start + other.count * other.width if intersection == 0:
if self.start >= o_end or other.start >= s_end:
return None return None
# We have an overlap. return (self.width, intersection)
start = max(self.start, other.start)
end = min(s_end, o_end)
count = (end - start) // self.width
assert count > 0
return (count, self.width, start)
def __getitem__(self, sliced): def __getitem__(self, sliced):
# type: (slice) -> RegClass # type: (slice) -> RegClass
@@ -271,14 +270,57 @@ class RegClass(object):
assert isinstance(sliced, slice), "RegClass slicing can't be 1 reg" assert isinstance(sliced, slice), "RegClass slicing can't be 1 reg"
# We could add strided sub-classes if needed. # We could add strided sub-classes if needed.
assert sliced.step is None, 'Subclass striding not supported' assert sliced.step is None, 'Subclass striding not supported'
# Can't slice a non-contiguous class
assert self.is_contiguous(), 'Cannot slice non-contiguous RegClass'
w = self.width w = self.width
s = self.start + sliced.start * w s = self.start() + sliced.start * w
c = sliced.stop - sliced.start c = sliced.stop - sliced.start
assert c > 1, "Can't have single-register classes" assert c > 1, "Can't have single-register classes"
return RegClass(self.bank, count=c, width=w, start=s) return RegClass(self.bank, count=c, width=w, start=s)
def without(self, *registers):
# type: (*Register) -> RegClass
"""
Create a sub-class of a register class excluding a specific set of
registers.
For example: GPR.without(GPR.r9)
"""
bm = self.bitmask
w = self.width
fmask = (1 << self.width) - 1
for reg in registers:
bm &= ~(fmask << (reg.unit * w))
return RegClass(self.bank, bitmask=bm)
def is_contiguous(self):
# type: () -> bool
"""
Returns boolean indicating whether a register class is a contiguous set
of register units.
"""
x = self.bitmask | (self.bitmask-1)
return self.bitmask != 0 and ((x+1) & x) == 0
def start(self):
# type: () -> int
"""
Returns the first valid register unit in this class.
"""
start = 0
bm = self.bitmask
fmask = (1 << self.width) - 1
while True:
if bm & fmask > 0:
break
start += 1
bm >>= self.width
return start
def __getattr__(self, attr): def __getattr__(self, attr):
# type: (str) -> Register # type: (str) -> Register
""" """
@@ -299,19 +341,13 @@ class RegClass(object):
Return as a list of 32-bit integers. Return as a list of 32-bit integers.
""" """
mask = [0] * MASK_LEN out_mask = []
mask32 = (1 << 32) - 1
bitmask = self.bitmask << self.bank.first_unit
for i in range(MASK_LEN):
out_mask.append((bitmask >> (i * 32)) & mask32)
start = self.bank.first_unit + self.start return out_mask
for a in range(self.count):
u = start + a * self.width
b = u % 32
# We need fancier masking code if a register can straddle mask
# words. This will only happen with widths that are not powers of
# two.
assert b + self.width <= 32, 'Register straddles words'
mask[u // 32] |= 1 << b
return mask
def subclass_mask(self): def subclass_mask(self):
# type: () -> int # type: () -> int

View File

@@ -59,7 +59,7 @@ def gen_regclass(rc, fmt):
fmt.format('width: {},', rc.width) fmt.format('width: {},', rc.width)
fmt.format('bank: {},', rc.bank.index) fmt.format('bank: {},', rc.bank.index)
fmt.format('toprc: {},', rc.toprc.index) fmt.format('toprc: {},', rc.toprc.index)
fmt.format('first: {},', rc.bank.first_unit + rc.start) fmt.format('first: {},', rc.bank.first_unit + rc.start())
fmt.format('subclasses: 0x{:x},', rc.subclass_mask()) fmt.format('subclasses: 0x{:x},', rc.subclass_mask())
mask = ', '.join('0x{:08x}'.format(x) for x in rc.mask()) mask = ', '.join('0x{:08x}'.format(x) for x in rc.mask())
fmt.format('mask: [{}],', mask) fmt.format('mask: [{}],', mask)

View File

@@ -11,7 +11,8 @@ from base.formats import IntCompare, FloatCompare, IntCond, FloatCond
from base.formats import Jump, Branch, BranchInt, BranchFloat from base.formats import Jump, Branch, BranchInt, BranchFloat
from base.formats import Ternary, FuncAddr, UnaryGlobalVar from base.formats import Ternary, FuncAddr, UnaryGlobalVar
from base.formats import RegMove, RegSpill, RegFill, CopySpecial from base.formats import RegMove, RegSpill, RegFill, CopySpecial
from .registers import GPR, ABCD, FPR, GPR8, FPR8, FLAG, StackGPR32, StackFPR32 from .registers import GPR, ABCD, FPR, GPR_NORIP, GPR8, FPR8, GPR8_NORIP
from .registers import FLAG, StackGPR32, StackFPR32
from .defs import supported_floatccs from .defs import supported_floatccs
from .settings import use_sse41 from .settings import use_sse41
@@ -103,6 +104,7 @@ def replace_put_op(emit, prefix):
# Register class mapping for no-REX instructions. # Register class mapping for no-REX instructions.
NOREX_MAP = { NOREX_MAP = {
GPR: GPR8, GPR: GPR8,
GPR_NORIP: GPR8_NORIP,
FPR: FPR8 FPR: FPR8
} }
@@ -766,7 +768,7 @@ frsp32 = TailRecipe(
# XX /r load with no offset. # XX /r load with no offset.
ld = TailRecipe( ld = TailRecipe(
'ld', Load, size=1, ins=(GPR), outs=(GPR), 'ld', Load, size=1, ins=(GPR_NORIP), outs=(GPR),
instp=IsEqual(Load.offset, 0), instp=IsEqual(Load.offset, 0),
clobbers_flags=False, clobbers_flags=False,
emit=''' emit='''

View File

@@ -47,6 +47,9 @@ FlagRegs = RegBank(
GPR = RegClass(IntRegs) GPR = RegClass(IntRegs)
GPR8 = GPR[0:8] GPR8 = GPR[0:8]
# In certain instructions, RBP and R13 are interpreted as RIP-relative.
GPR_NORIP = GPR.without(GPR.rbp, GPR.r13)
GPR8_NORIP = GPR8.without(GPR.rbp)
ABCD = GPR[0:4] ABCD = GPR[0:4]
FPR = RegClass(FloatRegs) FPR = RegClass(FloatRegs)
FPR8 = FPR[0:8] FPR8 = FPR[0:8]