diff --git a/meta/gen_encoding.py b/meta/gen_encoding.py index 648a1289eb..8fe8e1e304 100644 --- a/meta/gen_encoding.py +++ b/meta/gen_encoding.py @@ -54,6 +54,7 @@ import srcgen from constant_hash import compute_quadratic from unique_table import UniqueSeqTable from collections import OrderedDict, defaultdict +import math def emit_instp(instp, fmt): @@ -79,7 +80,7 @@ def emit_instp(instp, fmt): with fmt.indented('{} => {{'.format(instp.number), '}'): with fmt.indented( - 'if let {} {{ {}, .. }} = *inst {{' + 'if let InstructionData::{} {{ {}, .. }} = *inst {{' .format(iform.name, fields), '}'): fmt.line('return {};'.format(instp.rust_predicate(0))) @@ -90,15 +91,15 @@ def emit_instps(instps, fmt): """ with fmt.indented( - 'fn check_instp(inst: &InstructionData, instp_idx: u16) -> bool {', - '}'): + 'pub fn check_instp(inst: &InstructionData, instp_idx: u16) ' + + '-> bool {', '}'): with fmt.indented('match instp_idx {', '}'): for instp in instps: emit_instp(instp, fmt) fmt.line('_ => panic!("Invalid instruction predicate")') # The match cases will fall through if the instruction format is wrong. - fmt.line('panic!("Bad format {}/{} for instp {}",') + fmt.line('panic!("Bad format {:?}/{} for instp {}",') fmt.line(' InstructionFormat::from(inst),') fmt.line(' inst.opcode(),') fmt.line(' instp_idx);') @@ -279,7 +280,7 @@ def encode_enclists(level1, seq_table, doc_table): def emit_enclists(seq_table, doc_table, fmt): with fmt.indented( - 'const ENCLISTS: [u16; {}] = ['.format(len(seq_table.table)), + 'pub static ENCLISTS: [u16; {}] = ['.format(len(seq_table.table)), '];'): line = '' for idx, entry in enumerate(seq_table.table): @@ -299,13 +300,13 @@ def encode_level2_hashtables(level1, level2_hashtables, level2_doc): level2.layout_hashtable(level2_hashtables, level2_doc) -def emit_level2_hashtables(level2_hashtables, level2_doc, fmt): +def emit_level2_hashtables(level2_hashtables, offt, level2_doc, fmt): """ Emit the big concatenation of level 2 hash tables. """ with fmt.indented( - 'const LEVEL2: [(Opcode, u32); {}] = [' - .format(len(level2_hashtables)), + 'pub static LEVEL2: [Level2Entry<{}>; {}] = [' + .format(offt, len(level2_hashtables)), '];'): for offset, entry in enumerate(level2_hashtables): if offset in level2_doc: @@ -313,10 +314,54 @@ def emit_level2_hashtables(level2_hashtables, level2_doc, fmt): fmt.comment(doc) if entry: fmt.line( - '(Opcode::{}, {:#08x}),' + 'Level2Entry ' + + '{{ opcode: Opcode::{}, offset: {:#08x} }},' .format(entry.inst.camel_name, entry.offset)) else: - fmt.line('(Opcode::NotAnOpcode, 0),') + fmt.line( + 'Level2Entry ' + + '{ opcode: Opcode::NotAnOpcode, offset: 0 },') + + +def emit_level1_hashtable(cpumode, level1, offt, fmt): + """ + Emit a level 1 hash table for `cpumode`. + """ + hash_table = compute_quadratic( + level1.tables.values(), + lambda level2: level2.ty.number) + + with fmt.indented( + 'pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [' + .format(cpumode.name.upper(), offt, len(hash_table)), '];'): + for level2 in hash_table: + if level2: + l2l = int(math.log(level2.hash_table_len, 2)) + assert l2l > 0, "Hash table too small" + fmt.line( + 'Level1Entry ' + + '{{ ty: types::{}, log2len: {}, offset: {:#08x} }},' + .format( + level2.ty.name.upper(), + l2l, + level2.hash_table_offset)) + else: + # Empty entry. + fmt.line( + 'Level1Entry ' + + '{ ty: types::VOID, log2len: 0, offset: 0 },') + + +def offset_type(length): + """ + Compute an appropriate Rust integer type to use for offsets into a table of + the given length. + """ + if length <= 0x10000: + return 'u16' + else: + assert length <= 0x100000000, "Table too big" + return 'u32' def gen_isa(isa, fmt): @@ -324,6 +369,9 @@ def gen_isa(isa, fmt): # check_instp() function.. emit_instps(isa.all_instps, fmt) + # Level1 tables, one per CPU mode + level1_tables = dict() + # Tables for enclists with comments. seq_table = UniqueSeqTable() doc_table = defaultdict(list) @@ -335,11 +383,20 @@ def gen_isa(isa, fmt): for cpumode in isa.cpumodes: level2_doc[len(level2_hashtables)].append(cpumode.name) level1 = make_tables(cpumode) + level1_tables[cpumode] = level1 encode_enclists(level1, seq_table, doc_table) encode_level2_hashtables(level1, level2_hashtables, level2_doc) + # Level 1 table encodes offsets into the level 2 table. + level1_offt = offset_type(len(level2_hashtables)) + # Level 2 tables encodes offsets into seq_table. + level2_offt = offset_type(len(seq_table.table)) + emit_enclists(seq_table, doc_table, fmt) - emit_level2_hashtables(level2_hashtables, level2_doc, fmt) + emit_level2_hashtables(level2_hashtables, level2_offt, level2_doc, fmt) + for cpumode in isa.cpumodes: + emit_level1_hashtable( + cpumode, level1_tables[cpumode], level1_offt, fmt) def generate(isas, out_dir): diff --git a/src/libcretonne/isa/encoding.rs b/src/libcretonne/isa/encoding.rs new file mode 100644 index 0000000000..62aa54e445 --- /dev/null +++ b/src/libcretonne/isa/encoding.rs @@ -0,0 +1,152 @@ +//! Support types for generated encoding tables. +//! +//! This module contains types and functions for working with the encoding tables generated by +//! `meta/gen_encoding.py`. +use ir::{Type, Opcode}; +use isa::Encoding; +use constant_hash::{Table, probe}; + +/// Level 1 hash table entry. +/// +/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type +/// variable, using `VOID` for non-polymorphic instructions. +/// +/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2` +/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables +/// have a power-of-two size. +/// +/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the +/// size of the `LEVEL2` table. A `u16` offset allows entries to shrink to 32 bits each, but some +/// ISAs may have tables so large that `u32` offsets are needed. +/// +/// Empty entries are encoded with a 0 `log2len`. This is on the assumption that no level 2 tables +/// have only a single entry. +pub struct Level1Entry + Copy> { + pub ty: Type, + pub log2len: u8, + pub offset: OffT, +} + +impl + Copy> Table for [Level1Entry] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option { + if self[idx].log2len != 0 { + Some(self[idx].ty) + } else { + None + } + } +} + +/// Level 2 hash table entry. +/// +/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS` +/// table where the encoding recipes for the instrution are stored. +/// +/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16` +/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8` +/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16 +/// bits. +/// +/// Empty entries are encoded with a `NotAnOpcode` `opcode` field. +pub struct Level2Entry + Copy> { + pub opcode: Opcode, + pub offset: OffT, +} + +impl + Copy> Table for [Level2Entry] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option { + let opc = self[idx].opcode; + if opc != Opcode::NotAnOpcode { + Some(opc) + } else { + None + } + } +} + +/// Two-level hash table lookup. +/// +/// Given the controlling type variable and instruction opcode, find the corresponding encoding +/// list. +/// +/// Returns an offset into the ISA's `ENCLIST` table, or `None` if the opcode/type combination is +/// not legal. +pub fn lookup_enclist(ctrl_typevar: Type, + opcode: Opcode, + level1_table: &[Level1Entry], + level2_table: &[Level2Entry]) + -> Option + where OffT1: Into + Copy, + OffT2: Into + Copy +{ + probe(level1_table, ctrl_typevar, ctrl_typevar.index()).and_then(|l1idx| { + let l1ent = &level1_table[l1idx]; + let l2off = l1ent.offset.into() as usize; + let l2tab = &level2_table[l2off..l2off + (1 << l1ent.log2len)]; + probe(l2tab, opcode, opcode as usize).map(|l2idx| l2tab[l2idx].offset.into() as usize) + }) +} + +/// Encoding list entry. +/// +/// Encoding lists are represented as sequences of u16 words. +pub type EncListEntry = u16; + +/// Number of bits used to represent a predicate. c.f. `meta.gen_encoding.py`. +const PRED_BITS: u8 = 12; +const PRED_MASK: EncListEntry = (1 << PRED_BITS) - 1; + +/// The match-always instruction predicate. c.f. `meta.gen_encoding.py`. +const CODE_ALWAYS: EncListEntry = PRED_MASK; + +/// The encoding list terminator. +const CODE_FAIL: EncListEntry = 0xffff; + +/// Find the most general encoding of `inst`. +/// +/// Given an encoding list offset as returned by `lookup_enclist` above, search the encoding list +/// for the most general encoding that applies to `inst`. The encoding lists are laid out such that +/// this is the last valid entry in the list. +/// +/// This function takes two closures that are used to evaluate predicates: +/// - `instp` is passed an instruction predicate number to be evaluated on the current instruction. +/// - `isap` is passed an ISA predicate number to evaluate. +/// +/// Returns the corresponding encoding, or `None` if no list entries are satisfied by `inst`. +pub fn general_encoding(offset: usize, + enclist: &[EncListEntry], + instp: InstP, + isap: IsaP) + -> Option + where InstP: Fn(EncListEntry) -> bool, + IsaP: Fn(EncListEntry) -> bool +{ + let mut found = None; + let mut pos = offset; + while enclist[pos] != CODE_FAIL { + let pred = enclist[pos]; + if pred <= CODE_ALWAYS { + // This is an instruction predicate followed by recipe and encbits entries. + if pred == CODE_ALWAYS || instp(pred) { + found = Some(Encoding::new(enclist[pos + 1], enclist[pos + 2])) + } + pos += 3; + } else { + // This is an ISA predicate entry. + pos += 1; + if !isap(pred & PRED_MASK) { + // ISA predicate failed, skip the next N entries. + pos += 3 * (pred >> PRED_BITS) as usize; + } + } + } + found +} diff --git a/src/libcretonne/isa/mod.rs b/src/libcretonne/isa/mod.rs index fb2a4dadff..60028415fd 100644 --- a/src/libcretonne/isa/mod.rs +++ b/src/libcretonne/isa/mod.rs @@ -41,9 +41,10 @@ //! concurrent function compilations. pub mod riscv; +mod encoding; use settings; -use ir::{Inst, DataFlowGraph}; +use ir::{InstructionData, DataFlowGraph}; /// Look for a supported ISA with the given `name`. /// Return a builder that can create a corresponding `TargetIsa`. @@ -91,7 +92,7 @@ pub trait TargetIsa { /// Otherwise, return `None`. /// /// This is also the main entry point for determining if an instruction is legal. - fn encode(&self, dfg: &DataFlowGraph, inst: &Inst) -> Option; + fn encode(&self, dfg: &DataFlowGraph, inst: &InstructionData) -> Option; } /// Bits needed to encode an instruction as binary machine code. diff --git a/src/libcretonne/isa/riscv/encoding.rs b/src/libcretonne/isa/riscv/encoding.rs new file mode 100644 index 0000000000..760f4786d7 --- /dev/null +++ b/src/libcretonne/isa/riscv/encoding.rs @@ -0,0 +1,14 @@ +//! Encoding tables for RISC-V. + +use ir::{Opcode, InstructionData}; +use ir::instructions::InstructionFormat; +use ir::types; +use predicates; +use isa::encoding::{Level1Entry, Level2Entry}; + +// Include the generated encoding tables: +// - `LEVEL1_RV32` +// - `LEVEL1_RV64` +// - `LEVEL2` +// - `ENCLIST` +include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs")); diff --git a/src/libcretonne/isa/riscv/mod.rs b/src/libcretonne/isa/riscv/mod.rs index b9ffa4925c..0042b5e758 100644 --- a/src/libcretonne/isa/riscv/mod.rs +++ b/src/libcretonne/isa/riscv/mod.rs @@ -1,16 +1,19 @@ //! RISC-V Instruction Set Architecture. pub mod settings; +mod encoding; use super::super::settings as shared_settings; +use isa::encoding as shared_encoding; use super::Builder as IsaBuilder; use super::{TargetIsa, Encoding}; -use ir::{Inst, DataFlowGraph}; +use ir::{InstructionData, DataFlowGraph}; #[allow(dead_code)] struct Isa { shared_flags: shared_settings::Flags, isa_flags: settings::Flags, + cpumode: &'static [shared_encoding::Level1Entry], } pub fn isa_builder() -> IsaBuilder { @@ -23,14 +26,30 @@ pub fn isa_builder() -> IsaBuilder { fn isa_constructor(shared_flags: shared_settings::Flags, builder: shared_settings::Builder) -> Box { + let level1 = if shared_flags.is_64bit() { + &encoding::LEVEL1_RV64[..] + } else { + &encoding::LEVEL1_RV32[..] + }; Box::new(Isa { isa_flags: settings::Flags::new(&shared_flags, builder), shared_flags: shared_flags, + cpumode: level1, }) } impl TargetIsa for Isa { - fn encode(&self, _: &DataFlowGraph, _: &Inst) -> Option { - unimplemented!() + fn encode(&self, _: &DataFlowGraph, inst: &InstructionData) -> Option { + shared_encoding::lookup_enclist(inst.first_type(), + inst.opcode(), + self.cpumode, + &encoding::LEVEL2[..]) + .and_then(|enclist_offset| { + shared_encoding::general_encoding(enclist_offset, + &encoding::ENCLISTS[..], + |instp| encoding::check_instp(inst, instp), + // TODO: Implement ISA predicates properly. + |isap| isap != 17) + }) } }