Generate a constant hash table for recognizing opcodes.
Use a simple quadratically probed, open addressed hash table. We could use a parfect hash function, but it would take longer to compute in Python, and this is not in the critical path performancewise.
This commit is contained in:
76
meta/constant_hash.py
Normal file
76
meta/constant_hash.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
Generate constant hash tables.
|
||||||
|
|
||||||
|
The `constant_hash` module can generate constant pre-populated hash tables. We
|
||||||
|
don't attempt parfect hashing, but simply generate an open addressed
|
||||||
|
quadratically probed hash table.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def simple_hash(s):
|
||||||
|
"""
|
||||||
|
Compute a primitive hash of a string.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> hex(simple_hash("Hello"))
|
||||||
|
'0x2fa70c01'
|
||||||
|
>>> hex(simple_hash("world"))
|
||||||
|
'0x5b0c31d5'
|
||||||
|
"""
|
||||||
|
h = 5381
|
||||||
|
for c in s:
|
||||||
|
h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff
|
||||||
|
return h
|
||||||
|
|
||||||
|
def next_power_of_two(x):
|
||||||
|
"""
|
||||||
|
Compute the next power of two that is greater than `x`:
|
||||||
|
>>> next_power_of_two(0)
|
||||||
|
1
|
||||||
|
>>> next_power_of_two(1)
|
||||||
|
2
|
||||||
|
>>> next_power_of_two(2)
|
||||||
|
4
|
||||||
|
>>> next_power_of_two(3)
|
||||||
|
4
|
||||||
|
>>> next_power_of_two(4)
|
||||||
|
8
|
||||||
|
"""
|
||||||
|
s = 1
|
||||||
|
while x & (x + 1) != 0:
|
||||||
|
x |= x >> s
|
||||||
|
s *= 2
|
||||||
|
return x + 1
|
||||||
|
|
||||||
|
def compute_quadratic(items, hash_function):
|
||||||
|
"""
|
||||||
|
Compute an open addressed, quadratically probed hash table containing
|
||||||
|
`items`. The returned table is a list containing the elements of the
|
||||||
|
iterable `items` and `None` in unused slots.
|
||||||
|
|
||||||
|
:param items: Iterable set of items to place in hash table.
|
||||||
|
:param hash_function: Hash function which takes an item and returns a
|
||||||
|
number.
|
||||||
|
|
||||||
|
Simple example (see hash values above, they collide on slot 1):
|
||||||
|
>>> compute_quadratic(['Hello', 'world'], simple_hash)
|
||||||
|
[None, 'Hello', 'world', None]
|
||||||
|
"""
|
||||||
|
|
||||||
|
items = list(items)
|
||||||
|
# Table size must be a power of two. Aim for >20% unused slots.
|
||||||
|
size = next_power_of_two(int(1.20*len(items)))
|
||||||
|
table = [None] * size
|
||||||
|
|
||||||
|
for i in items:
|
||||||
|
h = hash_function(i) % size
|
||||||
|
s = 0
|
||||||
|
while table[h] is not None:
|
||||||
|
s += 1
|
||||||
|
h = (h + s) % size
|
||||||
|
table[h] = i
|
||||||
|
|
||||||
|
return table
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
||||||
@@ -3,6 +3,7 @@ Generate sources with instruction info.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import srcgen
|
import srcgen
|
||||||
|
import constant_hash
|
||||||
|
|
||||||
def collect_instr_groups(targets):
|
def collect_instr_groups(targets):
|
||||||
seen = set()
|
seen = set()
|
||||||
@@ -24,6 +25,7 @@ def gen_opcodes(groups, out_dir):
|
|||||||
fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]')
|
fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]')
|
||||||
instrs = []
|
instrs = []
|
||||||
with fmt.indented('pub enum Opcode {', '}'):
|
with fmt.indented('pub enum Opcode {', '}'):
|
||||||
|
fmt.line('NotAnOpcode,')
|
||||||
for g in groups:
|
for g in groups:
|
||||||
for i in g.instructions:
|
for i in g.instructions:
|
||||||
instrs.append(i)
|
instrs.append(i)
|
||||||
@@ -39,9 +41,20 @@ def gen_opcodes(groups, out_dir):
|
|||||||
# Generate a private opcode_name function.
|
# Generate a private opcode_name function.
|
||||||
with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
|
with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'):
|
||||||
with fmt.indented('match opc {', '}'):
|
with fmt.indented('match opc {', '}'):
|
||||||
|
fmt.line('Opcode::NotAnOpcode => "<not an opcode>",')
|
||||||
for i in instrs:
|
for i in instrs:
|
||||||
fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
|
fmt.format('Opcode::{} => "{}",', i.camel_name, i.name)
|
||||||
|
|
||||||
|
# Generate an opcode hash table for looking up opcodes by name.
|
||||||
|
hash_table = constant_hash.compute_quadratic(instrs,
|
||||||
|
lambda i: constant_hash.simple_hash(i.name))
|
||||||
|
with fmt.indented('const OPCODE_HASH_TABLE: [Opcode; {}] = ['.format(len(hash_table)), '];'):
|
||||||
|
for i in hash_table:
|
||||||
|
if i is None:
|
||||||
|
fmt.line('Opcode::NotAnOpcode,')
|
||||||
|
else:
|
||||||
|
fmt.format('Opcode::{},', i.camel_name)
|
||||||
|
|
||||||
fmt.update_file('opcodes.rs', out_dir)
|
fmt.update_file('opcodes.rs', out_dir)
|
||||||
|
|
||||||
def generate(targets, out_dir):
|
def generate(targets, out_dir):
|
||||||
|
|||||||
@@ -8,8 +8,12 @@
|
|||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
// The `Opcode` enum and the `opcode_name` function are generated from the meta instruction
|
// Include code generated by `meta/gen_instr.py`. This file contains:
|
||||||
// descriptions.
|
//
|
||||||
|
// - The `pub enum Opcode` definition with all known opcodes,
|
||||||
|
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
|
||||||
|
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
|
||||||
|
//
|
||||||
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
|
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
|
||||||
|
|
||||||
impl Display for Opcode {
|
impl Display for Opcode {
|
||||||
@@ -18,6 +22,46 @@ impl Display for Opcode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A primitive hash function for matching opcodes.
|
||||||
|
// Must match `meta/constant_hash.py`.
|
||||||
|
fn simple_hash(s: &str) -> u32 {
|
||||||
|
let mut h: u32 = 5381;
|
||||||
|
for c in s.chars() {
|
||||||
|
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
|
||||||
|
}
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Opcode {
|
||||||
|
/// Parse an Opcode name from a string.
|
||||||
|
pub fn from_str(s: &str) -> Option<Opcode> {
|
||||||
|
let tlen = OPCODE_HASH_TABLE.len();
|
||||||
|
assert!(tlen.is_power_of_two());
|
||||||
|
let mut idx = simple_hash(s) as usize;
|
||||||
|
let mut step: usize = 0;
|
||||||
|
loop {
|
||||||
|
idx = idx % tlen;
|
||||||
|
let entry = OPCODE_HASH_TABLE[idx];
|
||||||
|
|
||||||
|
if entry == Opcode::NotAnOpcode {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if *opcode_name(entry) == *s {
|
||||||
|
return Some(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quadratic probing.
|
||||||
|
step += 1;
|
||||||
|
// When `tlen` is a power of two, it can be proven that idx will visit all entries.
|
||||||
|
// This means that this loop will always terminate if the hash table has even one
|
||||||
|
// unused entry.
|
||||||
|
assert!(step < tlen);
|
||||||
|
idx += step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// 64-bit immediate integer operand.
|
/// 64-bit immediate integer operand.
|
||||||
///
|
///
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||||
@@ -178,6 +222,13 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
|
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
|
||||||
assert_eq!(format!("{}", Opcode::IaddImm), "iadd_imm");
|
assert_eq!(format!("{}", Opcode::IaddImm), "iadd_imm");
|
||||||
|
|
||||||
|
// Check the matcher.
|
||||||
|
assert_eq!(Opcode::from_str("iadd"), Some(Opcode::Iadd));
|
||||||
|
assert_eq!(Opcode::from_str("iadd_imm"), Some(Opcode::IaddImm));
|
||||||
|
assert_eq!(Opcode::from_str("iadd\0"), None);
|
||||||
|
assert_eq!(Opcode::from_str(""), None);
|
||||||
|
assert_eq!(Opcode::from_str("\0"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user