Generate a constant hash table for recognizing opcodes.

Use a simple quadratically probed, open addressed hash table. We could use a
parfect hash function, but it would take longer to compute in Python, and this
is not in the critical path performancewise.
This commit is contained in:
Jakob Stoklund Olesen
2016-04-07 20:24:21 -07:00
parent 3dcd2f8e58
commit 24e0828d20
3 changed files with 142 additions and 2 deletions

76
meta/constant_hash.py Normal file
View File

@@ -0,0 +1,76 @@
"""
Generate constant hash tables.
The `constant_hash` module can generate constant pre-populated hash tables. We
don't attempt parfect hashing, but simply generate an open addressed
quadratically probed hash table.
"""
def simple_hash(s):
"""
Compute a primitive hash of a string.
Example:
>>> hex(simple_hash("Hello"))
'0x2fa70c01'
>>> hex(simple_hash("world"))
'0x5b0c31d5'
"""
h = 5381
for c in s:
h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff
return h
def next_power_of_two(x):
"""
Compute the next power of two that is greater than `x`:
>>> next_power_of_two(0)
1
>>> next_power_of_two(1)
2
>>> next_power_of_two(2)
4
>>> next_power_of_two(3)
4
>>> next_power_of_two(4)
8
"""
s = 1
while x & (x + 1) != 0:
x |= x >> s
s *= 2
return x + 1
def compute_quadratic(items, hash_function):
"""
Compute an open addressed, quadratically probed hash table containing
`items`. The returned table is a list containing the elements of the
iterable `items` and `None` in unused slots.
:param items: Iterable set of items to place in hash table.
:param hash_function: Hash function which takes an item and returns a
number.
Simple example (see hash values above, they collide on slot 1):
>>> compute_quadratic(['Hello', 'world'], simple_hash)
[None, 'Hello', 'world', None]
"""
items = list(items)
# Table size must be a power of two. Aim for >20% unused slots.
size = next_power_of_two(int(1.20*len(items)))
table = [None] * size
for i in items:
h = hash_function(i) % size
s = 0
while table[h] is not None:
s += 1
h = (h + s) % size
table[h] = i
return table
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@@ -3,6 +3,7 @@ Generate sources with instruction info.
"""
import srcgen
import constant_hash
def collect_instr_groups(targets):
seen = set()
@@ -24,6 +25,7 @@ def gen_opcodes(groups, out_dir):
fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]')
instrs = []
with fmt.indented('pub enum Opcode {', '}'):
fmt.line('NotAnOpcode,')
for g in groups:
for i in g.instructions:
instrs.append(i)
@@ -39,9 +41,20 @@ def gen_opcodes(groups, out_dir):
# Generate a private opcode_name function.
with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
with fmt.indented('match opc {', '}'):
fmt.line('Opcode::NotAnOpcode => "<not an opcode>",')
for i in instrs:
fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
# Generate an opcode hash table for looking up opcodes by name.
hash_table = constant_hash.compute_quadratic(instrs,
lambda i: constant_hash.simple_hash(i.name))
with fmt.indented('const OPCODE_HASH_TABLE: [Opcode; {}] = ['.format(len(hash_table)), '];'):
for i in hash_table:
if i is None:
fmt.line('Opcode::NotAnOpcode,')
else:
fmt.format('Opcode::{},', i.camel_name)
fmt.update_file('opcodes.rs', out_dir)
def generate(targets, out_dir):

View File

@@ -8,8 +8,12 @@
use std::fmt::{self, Display, Formatter};
use std::mem;
// The `Opcode` enum and the `opcode_name` function are generated from the meta instruction
// descriptions.
// Include code generated by `meta/gen_instr.py`. This file contains:
//
// - The `pub enum Opcode` definition with all known opcodes,
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
//
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
impl Display for Opcode {
@@ -18,6 +22,46 @@ impl Display for Opcode {
}
}
// A primitive hash function for matching opcodes.
// Must match `meta/constant_hash.py`.
fn simple_hash(s: &str) -> u32 {
let mut h: u32 = 5381;
for c in s.chars() {
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
}
h
}
impl Opcode {
/// Parse an Opcode name from a string.
pub fn from_str(s: &str) -> Option<Opcode> {
let tlen = OPCODE_HASH_TABLE.len();
assert!(tlen.is_power_of_two());
let mut idx = simple_hash(s) as usize;
let mut step: usize = 0;
loop {
idx = idx % tlen;
let entry = OPCODE_HASH_TABLE[idx];
if entry == Opcode::NotAnOpcode {
return None;
}
if *opcode_name(entry) == *s {
return Some(entry);
}
// Quadratic probing.
step += 1;
// When `tlen` is a power of two, it can be proven that idx will visit all entries.
// This means that this loop will always terminate if the hash table has even one
// unused entry.
assert!(step < tlen);
idx += step;
}
}
}
/// 64-bit immediate integer operand.
///
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -178,6 +222,13 @@ mod tests {
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
assert_eq!(format!("{}", Opcode::IaddImm), "iadd_imm");
// Check the matcher.
assert_eq!(Opcode::from_str("iadd"), Some(Opcode::Iadd));
assert_eq!(Opcode::from_str("iadd_imm"), Some(Opcode::IaddImm));
assert_eq!(Opcode::from_str("iadd\0"), None);
assert_eq!(Opcode::from_str(""), None);
assert_eq!(Opcode::from_str("\0"), None);
}
#[test]