From 1b7d5d849f4a11c024db18905a677c86e08dc167 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 7 Apr 2016 20:24:21 -0700 Subject: [PATCH] Generate a constant hash table for recognizing opcodes. Use a simple quadratically probed, open addressed hash table. We could use a parfect hash function, but it would take longer to compute in Python, and this is not in the critical path performancewise. --- cranelift/src/libcretonne/immediates.rs | 55 +++++++++++++++++- meta/constant_hash.py | 76 +++++++++++++++++++++++++ meta/gen_instr.py | 13 +++++ 3 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 meta/constant_hash.py diff --git a/cranelift/src/libcretonne/immediates.rs b/cranelift/src/libcretonne/immediates.rs index 4b9adcc43a..911197c349 100644 --- a/cranelift/src/libcretonne/immediates.rs +++ b/cranelift/src/libcretonne/immediates.rs @@ -8,8 +8,12 @@ use std::fmt::{self, Display, Formatter}; use std::mem; -// The `Opcode` enum and the `opcode_name` function are generated from the meta instruction -// descriptions. +// Include code generated by `meta/gen_instr.py`. This file contains: +// +// - The `pub enum Opcode` definition with all known opcodes, +// - The private `fn opcode_name(Opcode) -> &'static str` function, and +// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. +// include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); impl Display for Opcode { @@ -18,6 +22,46 @@ impl Display for Opcode { } } +// A primitive hash function for matching opcodes. +// Must match `meta/constant_hash.py`. +fn simple_hash(s: &str) -> u32 { + let mut h: u32 = 5381; + for c in s.chars() { + h = (h ^ c as u32).wrapping_add(h.rotate_right(6)); + } + h +} + +impl Opcode { + /// Parse an Opcode name from a string. + pub fn from_str(s: &str) -> Option { + let tlen = OPCODE_HASH_TABLE.len(); + assert!(tlen.is_power_of_two()); + let mut idx = simple_hash(s) as usize; + let mut step: usize = 0; + loop { + idx = idx % tlen; + let entry = OPCODE_HASH_TABLE[idx]; + + if entry == Opcode::NotAnOpcode { + return None; + } + + if *opcode_name(entry) == *s { + return Some(entry); + } + + // Quadratic probing. + step += 1; + // When `tlen` is a power of two, it can be proven that idx will visit all entries. + // This means that this loop will always terminate if the hash table has even one + // unused entry. + assert!(step < tlen); + idx += step; + } + } +} + /// 64-bit immediate integer operand. /// #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -178,6 +222,13 @@ mod tests { assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); assert_eq!(format!("{}", Opcode::IaddImm), "iadd_imm"); + + // Check the matcher. + assert_eq!(Opcode::from_str("iadd"), Some(Opcode::Iadd)); + assert_eq!(Opcode::from_str("iadd_imm"), Some(Opcode::IaddImm)); + assert_eq!(Opcode::from_str("iadd\0"), None); + assert_eq!(Opcode::from_str(""), None); + assert_eq!(Opcode::from_str("\0"), None); } #[test] diff --git a/meta/constant_hash.py b/meta/constant_hash.py new file mode 100644 index 0000000000..b1b6104231 --- /dev/null +++ b/meta/constant_hash.py @@ -0,0 +1,76 @@ +""" +Generate constant hash tables. + +The `constant_hash` module can generate constant pre-populated hash tables. We +don't attempt parfect hashing, but simply generate an open addressed +quadratically probed hash table. +""" + +def simple_hash(s): + """ + Compute a primitive hash of a string. + + Example: + >>> hex(simple_hash("Hello")) + '0x2fa70c01' + >>> hex(simple_hash("world")) + '0x5b0c31d5' + """ + h = 5381 + for c in s: + h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff + return h + +def next_power_of_two(x): + """ + Compute the next power of two that is greater than `x`: + >>> next_power_of_two(0) + 1 + >>> next_power_of_two(1) + 2 + >>> next_power_of_two(2) + 4 + >>> next_power_of_two(3) + 4 + >>> next_power_of_two(4) + 8 + """ + s = 1 + while x & (x + 1) != 0: + x |= x >> s + s *= 2 + return x + 1 + +def compute_quadratic(items, hash_function): + """ + Compute an open addressed, quadratically probed hash table containing + `items`. The returned table is a list containing the elements of the + iterable `items` and `None` in unused slots. + + :param items: Iterable set of items to place in hash table. + :param hash_function: Hash function which takes an item and returns a + number. + + Simple example (see hash values above, they collide on slot 1): + >>> compute_quadratic(['Hello', 'world'], simple_hash) + [None, 'Hello', 'world', None] + """ + + items = list(items) + # Table size must be a power of two. Aim for >20% unused slots. + size = next_power_of_two(int(1.20*len(items))) + table = [None] * size + + for i in items: + h = hash_function(i) % size + s = 0 + while table[h] is not None: + s += 1 + h = (h + s) % size + table[h] = i + + return table + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/meta/gen_instr.py b/meta/gen_instr.py index b0fe4686f4..eb1b0a361f 100644 --- a/meta/gen_instr.py +++ b/meta/gen_instr.py @@ -3,6 +3,7 @@ Generate sources with instruction info. """ import srcgen +import constant_hash def collect_instr_groups(targets): seen = set() @@ -24,6 +25,7 @@ def gen_opcodes(groups, out_dir): fmt.line('#[derive(Copy, Clone, PartialEq, Eq, Debug)]') instrs = [] with fmt.indented('pub enum Opcode {', '}'): + fmt.line('NotAnOpcode,') for g in groups: for i in g.instructions: instrs.append(i) @@ -39,9 +41,20 @@ def gen_opcodes(groups, out_dir): # Generate a private opcode_name function. with fmt.indented('fn opcode_name(opc: Opcode) -> &\'static str {', '}'): with fmt.indented('match opc {', '}'): + fmt.line('Opcode::NotAnOpcode => "",') for i in instrs: fmt.format('Opcode::{} => "{}",', i.camel_name, i.name) + # Generate an opcode hash table for looking up opcodes by name. + hash_table = constant_hash.compute_quadratic(instrs, + lambda i: constant_hash.simple_hash(i.name)) + with fmt.indented('const OPCODE_HASH_TABLE: [Opcode; {}] = ['.format(len(hash_table)), '];'): + for i in hash_table: + if i is None: + fmt.line('Opcode::NotAnOpcode,') + else: + fmt.format('Opcode::{},', i.camel_name) + fmt.update_file('opcodes.rs', out_dir) def generate(targets, out_dir):