Generate a constant hash table for recognizing opcodes.

Use a simple quadratically probed, open addressed hash table. We could use a
parfect hash function, but it would take longer to compute in Python, and this
is not in the critical path performancewise.
This commit is contained in:
Jakob Stoklund Olesen
2016-04-07 20:24:21 -07:00
parent 3dcd2f8e58
commit 24e0828d20
3 changed files with 142 additions and 2 deletions

76
meta/constant_hash.py Normal file
View File

@@ -0,0 +1,76 @@
"""
Generate constant hash tables.
The `constant_hash` module can generate constant pre-populated hash tables. We
don't attempt parfect hashing, but simply generate an open addressed
quadratically probed hash table.
"""
def simple_hash(s):
"""
Compute a primitive hash of a string.
Example:
>>> hex(simple_hash("Hello"))
'0x2fa70c01'
>>> hex(simple_hash("world"))
'0x5b0c31d5'
"""
h = 5381
for c in s:
h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff
return h
def next_power_of_two(x):
"""
Compute the next power of two that is greater than `x`:
>>> next_power_of_two(0)
1
>>> next_power_of_two(1)
2
>>> next_power_of_two(2)
4
>>> next_power_of_two(3)
4
>>> next_power_of_two(4)
8
"""
s = 1
while x & (x + 1) != 0:
x |= x >> s
s *= 2
return x + 1
def compute_quadratic(items, hash_function):
"""
Compute an open addressed, quadratically probed hash table containing
`items`. The returned table is a list containing the elements of the
iterable `items` and `None` in unused slots.
:param items: Iterable set of items to place in hash table.
:param hash_function: Hash function which takes an item and returns a
number.
Simple example (see hash values above, they collide on slot 1):
>>> compute_quadratic(['Hello', 'world'], simple_hash)
[None, 'Hello', 'world', None]
"""
items = list(items)
# Table size must be a power of two. Aim for >20% unused slots.
size = next_power_of_two(int(1.20*len(items)))
table = [None] * size
for i in items:
h = hash_function(i) % size
s = 0
while table[h] is not None:
s += 1
h = (h + s) % size
table[h] = i
return table
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@@ -3,6 +3,7 @@ Generate sources with instruction info.
"""
import srcgen
import constant_hash
def collect_instr_groups(targets):
seen = set()
@@ -24,6 +25,7 @@ def gen_opcodes(groups, out_dir):
fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]')
instrs = []
with fmt.indented('pub enum Opcode {', '}'):
fmt.line('NotAnOpcode,')
for g in groups:
for i in g.instructions:
instrs.append(i)
@@ -39,9 +41,20 @@ def gen_opcodes(groups, out_dir):
# Generate a private opcode_name function.
with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
with fmt.indented('match opc {', '}'):
fmt.line('Opcode::NotAnOpcode => "<not an opcode>",')
for i in instrs:
fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
# Generate an opcode hash table for looking up opcodes by name.
hash_table = constant_hash.compute_quadratic(instrs,
lambda i: constant_hash.simple_hash(i.name))
with fmt.indented('const OPCODE_HASH_TABLE: [Opcode; {}] = ['.format(len(hash_table)), '];'):
for i in hash_table:
if i is None:
fmt.line('Opcode::NotAnOpcode,')
else:
fmt.format('Opcode::{},', i.camel_name)
fmt.update_file('opcodes.rs', out_dir)
def generate(targets, out_dir):