Generate a constant hash table for recognizing opcodes.
Use a simple quadratically probed, open addressed hash table. We could use a parfect hash function, but it would take longer to compute in Python, and this is not in the critical path performancewise.
This commit is contained in:
@@ -8,8 +8,12 @@
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::mem;
|
||||
|
||||
// The `Opcode` enum and the `opcode_name` function are generated from the meta instruction
|
||||
// descriptions.
|
||||
// Include code generated by `meta/gen_instr.py`. This file contains:
|
||||
//
|
||||
// - The `pub enum Opcode` definition with all known opcodes,
|
||||
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
|
||||
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
|
||||
//
|
||||
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
|
||||
|
||||
impl Display for Opcode {
|
||||
@@ -18,6 +22,46 @@ impl Display for Opcode {
|
||||
}
|
||||
}
|
||||
|
||||
// A primitive hash function for matching opcodes.
|
||||
// Must match `meta/constant_hash.py`.
|
||||
fn simple_hash(s: &str) -> u32 {
|
||||
let mut h: u32 = 5381;
|
||||
for c in s.chars() {
|
||||
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
|
||||
}
|
||||
h
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
/// Parse an Opcode name from a string.
|
||||
pub fn from_str(s: &str) -> Option<Opcode> {
|
||||
let tlen = OPCODE_HASH_TABLE.len();
|
||||
assert!(tlen.is_power_of_two());
|
||||
let mut idx = simple_hash(s) as usize;
|
||||
let mut step: usize = 0;
|
||||
loop {
|
||||
idx = idx % tlen;
|
||||
let entry = OPCODE_HASH_TABLE[idx];
|
||||
|
||||
if entry == Opcode::NotAnOpcode {
|
||||
return None;
|
||||
}
|
||||
|
||||
if *opcode_name(entry) == *s {
|
||||
return Some(entry);
|
||||
}
|
||||
|
||||
// Quadratic probing.
|
||||
step += 1;
|
||||
// When `tlen` is a power of two, it can be proven that idx will visit all entries.
|
||||
// This means that this loop will always terminate if the hash table has even one
|
||||
// unused entry.
|
||||
assert!(step < tlen);
|
||||
idx += step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 64-bit immediate integer operand.
|
||||
///
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
@@ -178,6 +222,13 @@ mod tests {
|
||||
|
||||
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
|
||||
assert_eq!(format!("{}", Opcode::IaddImm), "iadd_imm");
|
||||
|
||||
// Check the matcher.
|
||||
assert_eq!(Opcode::from_str("iadd"), Some(Opcode::Iadd));
|
||||
assert_eq!(Opcode::from_str("iadd_imm"), Some(Opcode::IaddImm));
|
||||
assert_eq!(Opcode::from_str("iadd\0"), None);
|
||||
assert_eq!(Opcode::from_str(""), None);
|
||||
assert_eq!(Opcode::from_str("\0"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
76
meta/constant_hash.py
Normal file
76
meta/constant_hash.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Generate constant hash tables.
|
||||
|
||||
The `constant_hash` module can generate constant pre-populated hash tables. We
|
||||
don't attempt parfect hashing, but simply generate an open addressed
|
||||
quadratically probed hash table.
|
||||
"""
|
||||
|
||||
def simple_hash(s):
|
||||
"""
|
||||
Compute a primitive hash of a string.
|
||||
|
||||
Example:
|
||||
>>> hex(simple_hash("Hello"))
|
||||
'0x2fa70c01'
|
||||
>>> hex(simple_hash("world"))
|
||||
'0x5b0c31d5'
|
||||
"""
|
||||
h = 5381
|
||||
for c in s:
|
||||
h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff
|
||||
return h
|
||||
|
||||
def next_power_of_two(x):
|
||||
"""
|
||||
Compute the next power of two that is greater than `x`:
|
||||
>>> next_power_of_two(0)
|
||||
1
|
||||
>>> next_power_of_two(1)
|
||||
2
|
||||
>>> next_power_of_two(2)
|
||||
4
|
||||
>>> next_power_of_two(3)
|
||||
4
|
||||
>>> next_power_of_two(4)
|
||||
8
|
||||
"""
|
||||
s = 1
|
||||
while x & (x + 1) != 0:
|
||||
x |= x >> s
|
||||
s *= 2
|
||||
return x + 1
|
||||
|
||||
def compute_quadratic(items, hash_function):
|
||||
"""
|
||||
Compute an open addressed, quadratically probed hash table containing
|
||||
`items`. The returned table is a list containing the elements of the
|
||||
iterable `items` and `None` in unused slots.
|
||||
|
||||
:param items: Iterable set of items to place in hash table.
|
||||
:param hash_function: Hash function which takes an item and returns a
|
||||
number.
|
||||
|
||||
Simple example (see hash values above, they collide on slot 1):
|
||||
>>> compute_quadratic(['Hello', 'world'], simple_hash)
|
||||
[None, 'Hello', 'world', None]
|
||||
"""
|
||||
|
||||
items = list(items)
|
||||
# Table size must be a power of two. Aim for >20% unused slots.
|
||||
size = next_power_of_two(int(1.20*len(items)))
|
||||
table = [None] * size
|
||||
|
||||
for i in items:
|
||||
h = hash_function(i) % size
|
||||
s = 0
|
||||
while table[h] is not None:
|
||||
s += 1
|
||||
h = (h + s) % size
|
||||
table[h] = i
|
||||
|
||||
return table
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
@@ -3,6 +3,7 @@ Generate sources with instruction info.
|
||||
"""
|
||||
|
||||
import srcgen
|
||||
import constant_hash
|
||||
|
||||
def collect_instr_groups(targets):
|
||||
seen = set()
|
||||
@@ -24,6 +25,7 @@ def gen_opcodes(groups, out_dir):
|
||||
fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]')
|
||||
instrs = []
|
||||
with fmt.indented('pub enum Opcode {', '}'):
|
||||
fmt.line('NotAnOpcode,')
|
||||
for g in groups:
|
||||
for i in g.instructions:
|
||||
instrs.append(i)
|
||||
@@ -39,9 +41,20 @@ def gen_opcodes(groups, out_dir):
|
||||
# Generate a private opcode_name function.
|
||||
with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
|
||||
with fmt.indented('match opc {', '}'):
|
||||
fmt.line('Opcode::NotAnOpcode => "<not an opcode>",')
|
||||
for i in instrs:
|
||||
fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
|
||||
|
||||
# Generate an opcode hash table for looking up opcodes by name.
|
||||
hash_table = constant_hash.compute_quadratic(instrs,
|
||||
lambda i: constant_hash.simple_hash(i.name))
|
||||
with fmt.indented('const OPCODE_HASH_TABLE: [Opcode; {}] = ['.format(len(hash_table)), '];'):
|
||||
for i in hash_table:
|
||||
if i is None:
|
||||
fmt.line('Opcode::NotAnOpcode,')
|
||||
else:
|
||||
fmt.format('Opcode::{},', i.camel_name)
|
||||
|
||||
fmt.update_file('opcodes.rs', out_dir)
|
||||
|
||||
def generate(targets, out_dir):
|
||||
|
||||
Reference in New Issue
Block a user