components: Improve heuristic for splitting adapters (#4827)

This commit is a (second?) attempt at improving the generation of adapter modules to avoid excessively large functions for fuzz-generated inputs. The first iteration of adapters simply translated an entire type inline per-function. This proved problematic however since the size of the adapter function was on the order of the overall size of a type, which can be exponential for a type that is otherwise defined in linear size. The second iteration of adapters performed a split where memory-based types would always be translated with individual functions. The theory here was that once a type was memory-based it was large enough to not warrant inline translation in the original function and a separate outlined function could be shared and otherwise used to deduplicate portions of the original giant function. This again proved problematic, however, since the splitting heuristic was quite naive and didn't take into account large stack-based types. This third iteration in this commit replaces the previous system with a similar but slightly more general one. Each adapter function now has a concept of fuel which is decremented each time a layer of a type is translated. When fuel runs out further translations are deferred to outlined functions. The fuel counter should hopefully provide a sort of reasonable upper bound on the size of a function and the outlined functions should ideally provide the ability to be called from multiple places and therefore deduplicate what would otherwise be a massive function. This final iteration is another attempt at guaranteeing that an adapter module is linear in size with respect to the input type section of the original module. Additionally this iteration uniformly handles stack and memory-based translations which means that stack-based translations can't go wild in their function size and memory-based translations may benefit slightly from having at least a little bit of inlining internally. The immediate impact of this is that the `component_api` fuzzer seems to be running at a faster rate than before. Otherwise #4825 is sufficient to invalidate preexisting fuzz-bugs and this PR is hopefully the final nail in the coffin to prevent further timeouts for small inputs cropping up. Closes #4816
2022-08-31 12:09:45 -05:00
parent fb8b9838fe
commit 99c6d7c083
3 changed files with 393 additions and 195 deletions
--- a/crates/environ/src/fact.rs
+++ b/crates/environ/src/fact.rs
@@ -20,7 +20,7 @@

 use crate::component::dfg::CoreDef;
 use crate::component::{
-    Adapter, AdapterOptions as AdapterOptionsDfg, ComponentTypesBuilder, InterfaceType,
+    Adapter, AdapterOptions as AdapterOptionsDfg, ComponentTypesBuilder, FlatType, InterfaceType,
    StringEncoding, TypeFuncIndex,
 };
 use crate::fact::transcode::Transcoder;
@@ -65,8 +65,8 @@ pub struct Module<'a> {
    imported_globals: PrimaryMap<GlobalIndex, CoreDef>,

    funcs: PrimaryMap<FunctionId, Function>,
-    translate_mem_funcs: HashMap<(InterfaceType, InterfaceType, Options, Options), FunctionId>,
-    translate_mem_worklist: Vec<(FunctionId, InterfaceType, InterfaceType, Options, Options)>,
+    helper_funcs: HashMap<Helper, FunctionId>,
+    helper_worklist: Vec<(FunctionId, Helper)>,
 }

 struct AdapterData {
@@ -123,6 +123,43 @@ enum Context {
    Lower,
 }

+/// Representation of a "helper function" which may be generated as part of
+/// generating an adapter trampoline.
+///
+/// Helper functions are created when inlining the translation for a type in its
+/// entirety would make a function excessively large. This is currently done via
+/// a simple fuel/cost heuristic based on the type being translated but may get
+/// fancier over time.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+struct Helper {
+    /// Metadata about the source type of what's being translated.
+    src: HelperType,
+    /// Metadata about the destination type which is being translated to.
+    dst: HelperType,
+}
+
+/// Information about a source or destination type in a `Helper` which is
+/// generated.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+struct HelperType {
+    /// The concrete type being translated.
+    ty: InterfaceType,
+    /// The configuration options (memory, etc) for the adapter.
+    opts: Options,
+    /// Where the type is located (either the stack or in memory)
+    loc: HelperLocation,
+}
+
+/// Where a `HelperType` is located, dictating the signature of the helper
+/// function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+enum HelperLocation {
+    /// Located on the stack in wasm locals.
+    Stack,
+    /// Located in linear memory as configured by `opts`.
+    Memory,
+}
+
 impl<'a> Module<'a> {
    /// Creates an empty module.
    pub fn new(types: &'a ComponentTypesBuilder, debug: bool) -> Module<'a> {
@@ -138,8 +175,8 @@ impl<'a> Module<'a> {
            imported_memories: PrimaryMap::new(),
            imported_globals: PrimaryMap::new(),
            funcs: PrimaryMap::new(),
-            translate_mem_funcs: HashMap::new(),
-            translate_mem_worklist: Vec::new(),
+            helper_funcs: HashMap::new(),
+            helper_worklist: Vec::new(),
        }
    }

@@ -188,8 +225,8 @@ impl<'a> Module<'a> {
            },
        );

-        while let Some((result, src, dst, src_opts, dst_opts)) = self.translate_mem_worklist.pop() {
-            trampoline::compile_translate_mem(self, result, src, &src_opts, dst, &dst_opts);
+        while let Some((result, helper)) = self.helper_worklist.pop() {
+            trampoline::compile_helper(self, result, helper);
        }
    }

@@ -321,27 +358,15 @@ impl<'a> Module<'a> {
            })
    }

-    fn translate_mem(
-        &mut self,
-        src: InterfaceType,
-        src_opts: &Options,
-        dst: InterfaceType,
-        dst_opts: &Options,
-    ) -> FunctionId {
-        *self
-            .translate_mem_funcs
-            .entry((src, dst, *src_opts, *dst_opts))
-            .or_insert_with(|| {
-                // Generate a fresh `Function` with a unique id for what we're about to
-                // generate.
-                let ty = self
-                    .core_types
-                    .function(&[src_opts.ptr(), dst_opts.ptr()], &[]);
-                let id = self.funcs.push(Function::new(None, ty));
-                self.translate_mem_worklist
-                    .push((id, src, dst, *src_opts, *dst_opts));
-                id
-            })
+    fn translate_helper(&mut self, helper: Helper) -> FunctionId {
+        *self.helper_funcs.entry(helper).or_insert_with(|| {
+            // Generate a fresh `Function` with a unique id for what we're about to
+            // generate.
+            let ty = helper.core_type(self.types, &mut self.core_types);
+            let id = self.funcs.push(Function::new(None, ty));
+            self.helper_worklist.push((id, helper));
+            id
+        })
    }

    /// Encodes this module into a WebAssembly binary.
@@ -462,6 +487,19 @@ impl Options {
            4
        }
    }
+
+    fn flat_types<'a>(
+        &self,
+        ty: &InterfaceType,
+        types: &'a ComponentTypesBuilder,
+    ) -> Option<&'a [FlatType]> {
+        let flat = types.flat_types(ty)?;
+        Some(if self.memory64 {
+            flat.memory64
+        } else {
+            flat.memory32
+        })
+    }
 }

 /// Temporary index which is not the same as `FuncIndex`.
@@ -542,3 +580,43 @@ impl Function {
        }
    }
 }
+
+impl Helper {
+    fn core_type(
+        &self,
+        types: &ComponentTypesBuilder,
+        core_types: &mut core_types::CoreTypes,
+    ) -> u32 {
+        let mut params = Vec::new();
+        let mut results = Vec::new();
+        // The source type being translated is always pushed onto the
+        // parameters first, either a pointer for memory or its flat
+        // representation.
+        self.src.push_flat(&mut params, types);
+
+        // The destination type goes into the parameter list if it's from
+        // memory or otherwise is the result of the function itself for a
+        // stack-based representation.
+        match self.dst.loc {
+            HelperLocation::Stack => self.dst.push_flat(&mut results, types),
+            HelperLocation::Memory => params.push(self.dst.opts.ptr()),
+        }
+
+        core_types.function(&params, &results)
+    }
+}
+
+impl HelperType {
+    fn push_flat(&self, dst: &mut Vec<ValType>, types: &ComponentTypesBuilder) {
+        match self.loc {
+            HelperLocation::Stack => {
+                for ty in self.opts.flat_types(&self.ty, types).unwrap() {
+                    dst.push((*ty).into());
+                }
+            }
+            HelperLocation::Memory => {
+                dst.push(self.opts.ptr());
+            }
+        }
+    }
+}