diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
index ad497188ec..6148b2ffad 100644
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1420,27 +1420,24 @@ pub(crate) fn define(
     // or 1.
     //
     // Encode movzbq as movzbl, because it's equivalent and shorter.
-    e.enc32(
-        bint.bind(I32).bind(B1),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
-
-    e.enc64(
-        bint.bind(I64).bind(B1),
-        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
-    );
-    e.enc64(
-        bint.bind(I64).bind(B1),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
-    e.enc64(
-        bint.bind(I32).bind(B1),
-        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
-    );
-    e.enc64(
-        bint.bind(I32).bind(B1),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
+    for &to in &[I8, I16, I32, I64] {
+        for &from in &[B1, B8] {
+            e.enc64(
+                bint.bind(to).bind(from),
+                rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+            );
+            e.enc64(
+                bint.bind(to).bind(from),
+                rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+            );
+            if to != I64 {
+                e.enc32(
+                    bint.bind(to).bind(from),
+                    rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+                );
+            }
+        }
+    }
 
     // Numerical conversions.
 
diff --git a/cranelift/codegen/src/abi.rs b/cranelift/codegen/src/abi.rs
index 0dc571bba7..f3591c1730 100644
--- a/cranelift/codegen/src/abi.rs
+++ b/cranelift/codegen/src/abi.rs
@@ -4,6 +4,7 @@
 //! `TargetIsa::legalize_signature()` method.
 
 use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
+use alloc::borrow::Cow;
 use alloc::vec::Vec;
 use core::cmp::Ordering;
 
@@ -86,7 +87,9 @@ pub trait ArgAssigner {
 /// Legalize the arguments in `args` using the given argument assigner.
 ///
 /// This function can be used for both arguments and return values.
-pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
+pub fn legalize_args<AA: ArgAssigner>(args: &[AbiParam], aa: &mut AA) -> Option<Vec<AbiParam>> {
+    let mut args = Cow::Borrowed(args);
+
     // Iterate over the arguments.
     // We may need to mutate the vector in place, so don't use a normal iterator, and clone the
     // argument to avoid holding a reference.
@@ -102,20 +105,25 @@ pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
         match aa.assign(&arg) {
             // Assign argument to a location and move on to the next one.
             ArgAction::Assign(loc) => {
-                args[argno].location = loc;
+                args.to_mut()[argno].location = loc;
                 argno += 1;
             }
             // Split this argument into two smaller ones. Then revisit both.
             ArgAction::Convert(conv) => {
                 let value_type = conv.apply(arg.value_type);
                 let new_arg = AbiParam { value_type, ..arg };
-                args[argno].value_type = value_type;
+                args.to_mut()[argno].value_type = value_type;
                 if conv.is_split() {
-                    args.insert(argno + 1, new_arg);
+                    args.to_mut().insert(argno + 1, new_arg);
                 }
             }
         }
     }
+
+    match args {
+        Cow::Borrowed(_) => None,
+        Cow::Owned(a) => Some(a),
+    }
 }
 
 /// Determine the right action to take when passing a `have` value type to a call signature where
diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs
index 831ffca575..49f6fdbd73 100644
--- a/cranelift/codegen/src/ir/dfg.rs
+++ b/cranelift/codegen/src/ir/dfg.rs
@@ -62,6 +62,9 @@ pub struct DataFlowGraph {
     /// well as the external function references.
     pub signatures: PrimaryMap<SigRef, Signature>,
 
+    /// The pre-legalization signature for each entry in `signatures`, if any.
+    pub old_signatures: SecondaryMap<SigRef, Option<Signature>>,
+
     /// External function references. These are functions that can be called directly.
     pub ext_funcs: PrimaryMap<FuncRef, ExtFuncData>,
 
@@ -85,6 +88,7 @@ impl DataFlowGraph {
             value_lists: ValueListPool::new(),
             values: PrimaryMap::new(),
             signatures: PrimaryMap::new(),
+            old_signatures: SecondaryMap::new(),
             ext_funcs: PrimaryMap::new(),
             values_labels: None,
             constants: ConstantPool::new(),
diff --git a/cranelift/codegen/src/ir/extfunc.rs b/cranelift/codegen/src/ir/extfunc.rs
index 76b249da4a..9274efe9b9 100644
--- a/cranelift/codegen/src/ir/extfunc.rs
+++ b/cranelift/codegen/src/ir/extfunc.rs
@@ -55,6 +55,53 @@ impl Signature {
     pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
         self.params.iter().rposition(|arg| arg.purpose == purpose)
     }
+
+    /// Find the index of a presumed unique special-purpose parameter.
+    pub fn special_return_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
+        self.returns.iter().rposition(|arg| arg.purpose == purpose)
+    }
+
+    /// Does this signature have a parameter whose `ArgumentPurpose` is
+    /// `purpose`?
+    pub fn uses_special_param(&self, purpose: ArgumentPurpose) -> bool {
+        self.special_param_index(purpose).is_some()
+    }
+
+    /// Does this signature have a return whose `ArgumentPurpose` is `purpose`?
+    pub fn uses_special_return(&self, purpose: ArgumentPurpose) -> bool {
+        self.special_return_index(purpose).is_some()
+    }
+
+    /// How many special parameters does this function have?
+    pub fn num_special_params(&self) -> usize {
+        self.params
+            .iter()
+            .filter(|p| p.purpose != ArgumentPurpose::Normal)
+            .count()
+    }
+
+    /// How many special returns does this function have?
+    pub fn num_special_returns(&self) -> usize {
+        self.returns
+            .iter()
+            .filter(|r| r.purpose != ArgumentPurpose::Normal)
+            .count()
+    }
+
+    /// Does this signature take an struct return pointer parameter?
+    pub fn uses_struct_return_param(&self) -> bool {
+        self.uses_special_param(ArgumentPurpose::StructReturn)
+    }
+
+    /// Does this return more than one normal value? (Pre-struct return
+    /// legalization)
+    pub fn is_multi_return(&self) -> bool {
+        self.returns
+            .iter()
+            .filter(|r| r.purpose == ArgumentPurpose::Normal)
+            .count()
+            > 1
+    }
 }
 
 /// Wrapper type capable of displaying a `Signature` with correct register names.
diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs
index 913c2d7be4..5318871689 100644
--- a/cranelift/codegen/src/ir/function.rs
+++ b/cranelift/codegen/src/ir/function.rs
@@ -34,6 +34,10 @@ pub struct Function {
     /// Signature of this function.
     pub signature: Signature,
 
+    /// The old signature of this function, before the most recent legalization,
+    /// if any.
+    pub old_signature: Option<Signature>,
+
     /// Stack slots allocated in this function.
     pub stack_slots: StackSlots,
 
@@ -96,6 +100,7 @@ impl Function {
         Self {
             name,
             signature: sig,
+            old_signature: None,
             stack_slots: StackSlots::new(),
             global_values: PrimaryMap::new(),
             heaps: PrimaryMap::new(),
diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs
index 00dee43af4..6a4edd0da6 100644
--- a/cranelift/codegen/src/ir/stackslot.rs
+++ b/cranelift/codegen/src/ir/stackslot.rs
@@ -64,6 +64,15 @@ pub enum StackSlotKind {
     /// stack slots are only valid while setting up a call.
     OutgoingArg,
 
+    /// Space allocated in the caller's frame for the callee's return values
+    /// that are passed out via return pointer.
+    ///
+    /// If there are more return values than registers available for the callee's calling
+    /// convention, or the return value is larger than the available registers' space, then we
+    /// allocate stack space in this frame and pass a pointer to the callee, which then writes its
+    /// return values into this space.
+    StructReturnSlot,
+
     /// An emergency spill slot.
     ///
     /// Emergency slots are allocated late when the register's constraint solver needs extra space
@@ -81,6 +90,7 @@ impl FromStr for StackSlotKind {
             "spill_slot" => Ok(SpillSlot),
             "incoming_arg" => Ok(IncomingArg),
             "outgoing_arg" => Ok(OutgoingArg),
+            "sret_slot" => Ok(StructReturnSlot),
             "emergency_slot" => Ok(EmergencySlot),
             _ => Err(()),
         }
@@ -95,6 +105,7 @@ impl fmt::Display for StackSlotKind {
             SpillSlot => "spill_slot",
             IncomingArg => "incoming_arg",
             OutgoingArg => "outgoing_arg",
+            StructReturnSlot => "sret_slot",
             EmergencySlot => "emergency_slot",
         })
     }
diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs
index 894c67ecb5..85dc8d8f43 100644
--- a/cranelift/codegen/src/isa/arm32/abi.rs
+++ b/cranelift/codegen/src/isa/arm32/abi.rs
@@ -6,6 +6,7 @@ use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
 use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, Type};
 use crate::isa::RegClass;
 use crate::regalloc::RegisterSet;
+use alloc::borrow::Cow;
 use core::i32;
 use target_lexicon::Triple;
 
@@ -78,11 +79,13 @@ impl ArgAssigner for Args {
 }
 
 /// Legalize `sig`.
-pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bool) {
+pub fn legalize_signature(sig: &mut Cow<ir::Signature>, triple: &Triple, _current: bool) {
     let bits = triple.pointer_width().unwrap().bits();
 
     let mut args = Args::new(bits);
-    legalize_args(&mut sig.params, &mut args);
+    if let Some(new_params) = legalize_args(&sig.params, &mut args) {
+        sig.to_mut().params = new_params;
+    }
 }
 
 /// Get register class for a type appearing in a legalized signature.
diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs
index 6c00ef9089..0358a70113 100644
--- a/cranelift/codegen/src/isa/arm32/mod.rs
+++ b/cranelift/codegen/src/isa/arm32/mod.rs
@@ -15,6 +15,7 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
 use crate::isa::Builder as IsaBuilder;
 use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
+use alloc::borrow::Cow;
 use alloc::boxed::Box;
 use core::fmt;
 use target_lexicon::{Architecture, Triple};
@@ -100,7 +101,7 @@ impl TargetIsa for Isa {
         )
     }
 
-    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
         abi::legalize_signature(sig, &self.triple, current)
     }
 
diff --git a/cranelift/codegen/src/isa/arm64/abi.rs b/cranelift/codegen/src/isa/arm64/abi.rs
index efc1f6125d..8d486d4193 100644
--- a/cranelift/codegen/src/isa/arm64/abi.rs
+++ b/cranelift/codegen/src/isa/arm64/abi.rs
@@ -5,10 +5,11 @@ use crate::ir;
 use crate::isa::RegClass;
 use crate::regalloc::RegisterSet;
 use crate::settings as shared_settings;
+use alloc::borrow::Cow;
 
 /// Legalize `sig`.
 pub fn legalize_signature(
-    _sig: &mut ir::Signature,
+    _sig: &mut Cow<ir::Signature>,
     _flags: &shared_settings::Flags,
     _current: bool,
 ) {
diff --git a/cranelift/codegen/src/isa/arm64/mod.rs b/cranelift/codegen/src/isa/arm64/mod.rs
index 26c28329bc..f00062b2af 100644
--- a/cranelift/codegen/src/isa/arm64/mod.rs
+++ b/cranelift/codegen/src/isa/arm64/mod.rs
@@ -15,6 +15,7 @@ use crate::isa::enc_tables::{lookup_enclist, Encodings};
 use crate::isa::Builder as IsaBuilder;
 use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
+use alloc::borrow::Cow;
 use alloc::boxed::Box;
 use core::fmt;
 use target_lexicon::Triple;
@@ -88,7 +89,7 @@ impl TargetIsa for Isa {
         )
     }
 
-    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
         abi::legalize_signature(sig, &self.shared_flags, current)
     }
 
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index 15f9d59c60..aaa802b935 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -63,6 +63,7 @@ use crate::result::CodegenResult;
 use crate::settings;
 use crate::settings::SetResult;
 use crate::timing;
+use alloc::borrow::Cow;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt;
@@ -315,7 +316,7 @@ pub trait TargetIsa: fmt::Display + Sync {
     /// Arguments and return values for the caller's frame pointer and other callee-saved registers
     /// should not be added by this function. These arguments are not added until after register
     /// allocation.
-    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool);
+    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool);
 
     /// Get the register class that should be used to represent an ABI argument or return value of
     /// type `ty`. This should be the top-level register class that contains the argument
diff --git a/cranelift/codegen/src/isa/riscv/abi.rs b/cranelift/codegen/src/isa/riscv/abi.rs
index 59b266f7ca..44c5f36afe 100644
--- a/cranelift/codegen/src/isa/riscv/abi.rs
+++ b/cranelift/codegen/src/isa/riscv/abi.rs
@@ -11,6 +11,7 @@ use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
 use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
 use crate::isa::RegClass;
 use crate::regalloc::RegisterSet;
+use alloc::borrow::Cow;
 use core::i32;
 use target_lexicon::Triple;
 
@@ -88,7 +89,7 @@ impl ArgAssigner for Args {
 
 /// Legalize `sig` for RISC-V.
 pub fn legalize_signature(
-    sig: &mut ir::Signature,
+    sig: &mut Cow<ir::Signature>,
     triple: &Triple,
     isa_flags: &settings::Flags,
     current: bool,
@@ -96,10 +97,14 @@ pub fn legalize_signature(
     let bits = triple.pointer_width().unwrap().bits();
 
     let mut args = Args::new(bits, isa_flags.enable_e());
-    legalize_args(&mut sig.params, &mut args);
+    if let Some(new_params) = legalize_args(&sig.params, &mut args) {
+        sig.to_mut().params = new_params;
+    }
 
     let mut rets = Args::new(bits, isa_flags.enable_e());
-    legalize_args(&mut sig.returns, &mut rets);
+    if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
+        sig.to_mut().returns = new_returns;
+    }
 
     if current {
         let ptr = Type::int(u16::from(bits)).unwrap();
@@ -110,8 +115,8 @@ pub fn legalize_signature(
         // in any register, but a micro-architecture with a return address predictor will only
         // recognize it as a return if the address is in `x1`.
         let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
-        sig.params.push(link);
-        sig.returns.push(link);
+        sig.to_mut().params.push(link);
+        sig.to_mut().returns.push(link);
     }
 }
 
diff --git a/cranelift/codegen/src/isa/riscv/mod.rs b/cranelift/codegen/src/isa/riscv/mod.rs
index 79aaeaddf9..25244fab81 100644
--- a/cranelift/codegen/src/isa/riscv/mod.rs
+++ b/cranelift/codegen/src/isa/riscv/mod.rs
@@ -15,6 +15,7 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
 use crate::isa::Builder as IsaBuilder;
 use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
+use alloc::borrow::Cow;
 use alloc::boxed::Box;
 use core::fmt;
 use target_lexicon::{PointerWidth, Triple};
@@ -95,7 +96,7 @@ impl TargetIsa for Isa {
         )
     }
 
-    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
         abi::legalize_signature(sig, &self.triple, &self.isa_flags, current)
     }
 
diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs
index d78af9b448..6d09635949 100644
--- a/cranelift/codegen/src/isa/x86/abi.rs
+++ b/cranelift/codegen/src/isa/x86/abi.rs
@@ -17,6 +17,7 @@ use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
 use crate::regalloc::RegisterSet;
 use crate::result::CodegenResult;
 use crate::stack_layout::layout_stack;
+use alloc::borrow::Cow;
 use alloc::vec::Vec;
 use core::i32;
 use target_lexicon::{PointerWidth, Triple};
@@ -166,9 +167,117 @@ impl ArgAssigner for Args {
     }
 }
 
+/// Get the number of general-purpose and floating-point registers required to
+/// hold the given `AbiParam` returns.
+fn num_return_registers_required<'a>(
+    word_bit_size: u8,
+    call_conv: CallConv,
+    shared_flags: &shared_settings::Flags,
+    isa_flags: &isa_settings::Flags,
+    return_params: impl IntoIterator<Item = &'a AbiParam>,
+) -> (usize, usize) {
+    // Pretend we have "infinite" registers to give out, since we aren't
+    // actually assigning `AbiParam`s to registers yet, just seeing how many
+    // registers we would need in order to fit all the `AbiParam`s in registers.
+    let gprs = &[RU::rax; 128];
+    let fpr_limit = std::usize::MAX;
+
+    let mut assigner = Args::new(
+        word_bit_size,
+        gprs,
+        fpr_limit,
+        call_conv,
+        shared_flags,
+        isa_flags,
+    );
+
+    let mut gprs_required = 0;
+    let mut fprs_required = 0;
+
+    for param in return_params {
+        match param.location {
+            ArgumentLoc::Unassigned => {
+                // Let this fall through so that we assign it a location and
+                // account for how many registers it ends up requiring below...
+            }
+            ArgumentLoc::Reg(_) => {
+                // This is already assigned to a register. Count it.
+                if param.value_type.is_float() {
+                    fprs_required += 1;
+                } else {
+                    gprs_required += 1;
+                }
+                continue;
+            }
+            _ => {
+                // It is already assigned, but not to a register. Skip it.
+                continue;
+            }
+        }
+
+        // We're going to mutate the type as it gets converted, so make our own
+        // copy that isn't visible to the outside world.
+        let mut param = param.clone();
+
+        let mut split_factor = 1;
+
+        loop {
+            match assigner.assign(&param) {
+                ArgAction::Convert(ValueConversion::IntSplit) => {
+                    split_factor *= 2;
+                    param.value_type = param.value_type.half_width().unwrap();
+                }
+                ArgAction::Convert(ValueConversion::VectorSplit) => {
+                    split_factor *= 2;
+                    param.value_type = param.value_type.half_vector().unwrap();
+                }
+                ArgAction::Assign(ArgumentLoc::Reg(_))
+                | ArgAction::Convert(ValueConversion::IntBits)
+                | ArgAction::Convert(ValueConversion::Sext(_))
+                | ArgAction::Convert(ValueConversion::Uext(_)) => {
+                    // Ok! We can fit this (potentially split) value into a
+                    // register! Add the number of params we split the parameter
+                    // into to our current counts.
+                    if param.value_type.is_float() {
+                        fprs_required += split_factor;
+                    } else {
+                        gprs_required += split_factor;
+                    }
+
+                    // But we also have to call `assign` once for each split value, to
+                    // update `assigner`'s internal state.
+                    for _ in 1..split_factor {
+                        match assigner.assign(&param) {
+                            ArgAction::Assign(_)
+                            | ArgAction::Convert(ValueConversion::IntBits)
+                            | ArgAction::Convert(ValueConversion::Sext(_))
+                            | ArgAction::Convert(ValueConversion::Uext(_)) => {
+                                continue;
+                            }
+                            otherwise => panic!(
+                                "unexpected action after first split succeeded: {:?}",
+                                otherwise
+                            ),
+                        }
+                    }
+
+                    // Continue to the next param.
+                    break;
+                }
+                ArgAction::Assign(loc) => panic!(
+                    "unexpected location assignment, should have had enough registers: {:?}",
+                    loc
+                ),
+            }
+        }
+    }
+
+    (gprs_required, fprs_required)
+}
+
 /// Legalize `sig`.
 pub fn legalize_signature(
-    sig: &mut ir::Signature,
+    sig: &mut Cow<ir::Signature>,
     triple: &Triple,
     _current: bool,
     shared_flags: &shared_settings::Flags,
@@ -207,9 +316,7 @@ pub fn legalize_signature(
         }
     }
 
-    legalize_args(&mut sig.params, &mut args);
-
-    let (regs, fpr_limit) = if sig.call_conv.extends_windows_fastcall() {
+    let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() {
         // windows-x64 calling convention only uses XMM0 or RAX for return values
         (&RET_GPRS_WIN_FASTCALL_X64[..], 1)
     } else {
@@ -218,13 +325,77 @@ pub fn legalize_signature(
 
     let mut rets = Args::new(
         bits,
-        regs,
-        fpr_limit,
+        ret_regs,
+        ret_fpr_limit,
         sig.call_conv,
         shared_flags,
         isa_flags,
     );
-    legalize_args(&mut sig.returns, &mut rets);
+
+    if sig.is_multi_return() && {
+        // Even if it is multi-return, see if the return values will fit into
+        // our available return registers.
+        let (gprs_required, fprs_required) = num_return_registers_required(
+            bits,
+            sig.call_conv,
+            shared_flags,
+            isa_flags,
+            &sig.returns,
+        );
+        gprs_required > ret_regs.len() || fprs_required > ret_fpr_limit
+    } {
+        debug_assert!(!sig.uses_struct_return_param());
+
+        // We're using the first register for the return pointer parameter.
+        let mut ret_ptr_param = AbiParam {
+            value_type: args.pointer_type,
+            purpose: ArgumentPurpose::StructReturn,
+            extension: ArgumentExtension::None,
+            location: ArgumentLoc::Unassigned,
+        };
+        match args.assign(&ret_ptr_param) {
+            ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
+                ret_ptr_param.location = ArgumentLoc::Reg(reg);
+                sig.to_mut().params.push(ret_ptr_param);
+            }
+            _ => unreachable!("return pointer should always get a register assignment"),
+        }
+
+        // We're using the first return register for the return pointer (like
+        // sys v does).
+        let mut ret_ptr_return = AbiParam {
+            value_type: args.pointer_type,
+            purpose: ArgumentPurpose::StructReturn,
+            extension: ArgumentExtension::None,
+            location: ArgumentLoc::Unassigned,
+        };
+        match rets.assign(&ret_ptr_return) {
+            ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
+                ret_ptr_return.location = ArgumentLoc::Reg(reg);
+                sig.to_mut().returns.push(ret_ptr_return);
+            }
+            _ => unreachable!("return pointer should always get a register assignment"),
+        }
+
+        sig.to_mut().returns.retain(|ret| {
+            // Either this is the return pointer, in which case we want to keep
+            // it, or else assume that it is assigned for a reason and doesn't
+            // conflict with our return pointering legalization.
+            debug_assert_eq!(
+                ret.location.is_assigned(),
+                ret.purpose != ArgumentPurpose::Normal
+            );
+            ret.location.is_assigned()
+        });
+    }
+
+    if let Some(new_params) = legalize_args(&sig.params, &mut args) {
+        sig.to_mut().params = new_params;
+    }
+
+    if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
+        sig.to_mut().returns = new_returns;
+    }
 }
 
 /// Get register class for a type appearing in a legalized signature.
diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs
index cfba3fef48..e2e785675f 100644
--- a/cranelift/codegen/src/isa/x86/mod.rs
+++ b/cranelift/codegen/src/isa/x86/mod.rs
@@ -18,6 +18,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
 use crate::regalloc;
 use crate::result::CodegenResult;
 use crate::timing;
+use alloc::borrow::Cow;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use core::fmt;
@@ -107,7 +108,7 @@ impl TargetIsa for Isa {
         )
     }
 
-    fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
+    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
         abi::legalize_signature(
             sig,
             &self.triple,
diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs
index 479208751f..5063b6d910 100644
--- a/cranelift/codegen/src/legalizer/boundary.rs
+++ b/cranelift/codegen/src/legalizer/boundary.rs
@@ -23,11 +23,14 @@ use crate::flowgraph::ControlFlowGraph;
 use crate::ir::instructions::CallInfo;
 use crate::ir::{
     AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
-    SigRef, Signature, Type, Value, ValueLoc,
+    MemFlags, SigRef, Signature, StackSlotData, StackSlotKind, Type, Value, ValueLoc,
 };
 use crate::isa::TargetIsa;
 use crate::legalizer::split::{isplit, vsplit};
+use alloc::borrow::Cow;
 use alloc::vec::Vec;
+use core::mem;
+use cranelift_entity::EntityList;
 use log::debug;
 
 /// Legalize all the function signatures in `func`.
@@ -36,9 +39,16 @@ use log::debug;
 /// change the entry block arguments, calls, or return instructions, so this can leave the function
 /// in a state with type discrepancies.
 pub fn legalize_signatures(func: &mut Function, isa: &dyn TargetIsa) {
-    legalize_signature(&mut func.signature, true, isa);
-    for sig_data in func.dfg.signatures.values_mut() {
-        legalize_signature(sig_data, false, isa);
+    if let Some(new) = legalize_signature(&func.signature, true, isa) {
+        let old = mem::replace(&mut func.signature, new);
+        func.old_signature = Some(old);
+    }
+
+    for (sig_ref, sig_data) in func.dfg.signatures.iter_mut() {
+        if let Some(new) = legalize_signature(sig_data, false, isa) {
+            let old = mem::replace(sig_data, new);
+            func.dfg.old_signatures[sig_ref] = Some(old);
+        }
     }
 
     if let Some(entry) = func.layout.entry_block() {
@@ -50,14 +60,25 @@ pub fn legalize_signatures(func: &mut Function, isa: &dyn TargetIsa) {
 /// Legalize the libcall signature, which we may generate on the fly after
 /// `legalize_signatures` has been called.
 pub fn legalize_libcall_signature(signature: &mut Signature, isa: &dyn TargetIsa) {
-    legalize_signature(signature, false, isa);
+    if let Some(s) = legalize_signature(signature, false, isa) {
+        *signature = s;
+    }
 }
 
 /// Legalize the given signature.
 ///
 /// `current` is true if this is the signature for the current function.
-fn legalize_signature(signature: &mut Signature, current: bool, isa: &dyn TargetIsa) {
-    isa.legalize_signature(signature, current);
+fn legalize_signature(
+    signature: &Signature,
+    current: bool,
+    isa: &dyn TargetIsa,
+) -> Option<Signature> {
+    let mut cow = Cow::Borrowed(signature);
+    isa.legalize_signature(&mut cow, current);
+    match cow {
+        Cow::Borrowed(_) => None,
+        Cow::Owned(s) => Some(s),
+    }
 }
 
 /// Legalize the entry block parameters after `func`'s signature has been legalized.
@@ -245,6 +266,166 @@ where
     call
 }
 
+fn assert_is_valid_sret_legalization(
+    old_ret_list: &EntityList<Value>,
+    old_sig: &Signature,
+    new_sig: &Signature,
+    pos: &FuncCursor,
+) {
+    debug_assert_eq!(
+        old_sig.returns.len(),
+        old_ret_list.len(&pos.func.dfg.value_lists)
+    );
+
+    // Assert that the only difference in special parameters is that there
+    // is an appended struct return pointer parameter.
+    let old_special_params: Vec<_> = old_sig
+        .params
+        .iter()
+        .filter(|r| r.purpose != ArgumentPurpose::Normal)
+        .collect();
+    let new_special_params: Vec<_> = new_sig
+        .params
+        .iter()
+        .filter(|r| r.purpose != ArgumentPurpose::Normal)
+        .collect();
+    debug_assert_eq!(old_special_params.len() + 1, new_special_params.len());
+    debug_assert!(old_special_params
+        .iter()
+        .zip(&new_special_params)
+        .all(|(old, new)| old.purpose == new.purpose));
+    debug_assert_eq!(
+        new_special_params.last().unwrap().purpose,
+        ArgumentPurpose::StructReturn
+    );
+
+    // If the special returns have changed at all, then the only change
+    // should be that the struct return pointer is returned back out of the
+    // function, so that callers don't have to load its stack address again.
+    let old_special_returns: Vec<_> = old_sig
+        .returns
+        .iter()
+        .filter(|r| r.purpose != ArgumentPurpose::Normal)
+        .collect();
+    let new_special_returns: Vec<_> = new_sig
+        .returns
+        .iter()
+        .filter(|r| r.purpose != ArgumentPurpose::Normal)
+        .collect();
+    debug_assert!(old_special_returns
+        .iter()
+        .zip(&new_special_returns)
+        .all(|(old, new)| old.purpose == new.purpose));
+    debug_assert!(
+        old_special_returns.len() == new_special_returns.len()
+            || (old_special_returns.len() + 1 == new_special_returns.len()
+                && new_special_returns.last().unwrap().purpose == ArgumentPurpose::StructReturn)
+    );
+}
+
+fn legalize_sret_call(isa: &dyn TargetIsa, pos: &mut FuncCursor, sig_ref: SigRef, call: Inst) {
+    let old_ret_list = pos.func.dfg.detach_results(call);
+    let old_sig = pos.func.dfg.old_signatures[sig_ref]
+        .take()
+        .expect("must have an old signature when using an `sret` parameter");
+
+    // We make a bunch of assumptions about the shape of the old, multi-return
+    // signature and the new, sret-using signature in this legalization
+    // function. Assert that these assumptions hold true in debug mode.
+    if cfg!(debug_assertions) {
+        assert_is_valid_sret_legalization(
+            &old_ret_list,
+            &old_sig,
+            &pos.func.dfg.signatures[sig_ref],
+            &pos,
+        );
+    }
+
+    // Go through and remove all normal return values from the `call`
+    // instruction's returns list. These will be stored into the stack slot that
+    // the sret points to. At the same time, calculate the size of the sret
+    // stack slot.
+    let mut sret_slot_size = 0;
+    for (i, ret) in old_sig.returns.iter().enumerate() {
+        let v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap();
+        let ty = pos.func.dfg.value_type(v);
+        if ret.purpose == ArgumentPurpose::Normal {
+            debug_assert_eq!(ret.location, ArgumentLoc::Unassigned);
+            let ty = legalized_type_for_sret(ty);
+            let size = ty.bytes();
+            sret_slot_size = round_up_to_multiple_of_type_align(sret_slot_size, ty) + size;
+        } else {
+            let new_v = pos.func.dfg.append_result(call, ty);
+            pos.func.dfg.change_to_alias(v, new_v);
+        }
+    }
+
+    let stack_slot = pos.func.stack_slots.push(StackSlotData {
+        kind: StackSlotKind::StructReturnSlot,
+        size: sret_slot_size,
+        offset: None,
+    });
+
+    // Append the sret pointer to the `call` instruction's arguments.
+    let ptr_type = Type::triple_pointer_type(isa.triple());
+    let sret_arg = pos.ins().stack_addr(ptr_type, stack_slot, 0);
+    pos.func.dfg.append_inst_arg(call, sret_arg);
+
+    // The sret pointer might be returned by the signature as well. If so, we
+    // need to add it to the `call` instruction's results list.
+    //
+    // Additionally, when the sret is explicitly returned in this calling
+    // convention, then use it when loading the sret returns back into ssa
+    // values to avoid keeping the original `sret_arg` live and potentially
+    // having to do spills and fills.
+    let sret =
+        if pos.func.dfg.signatures[sig_ref].uses_special_return(ArgumentPurpose::StructReturn) {
+            pos.func.dfg.append_result(call, ptr_type)
+        } else {
+            sret_arg
+        };
+
+    // Finally, load each of the call's return values out of the sret stack
+    // slot.
+    pos.goto_after_inst(call);
+    let mut offset = 0;
+    for i in 0..old_ret_list.len(&pos.func.dfg.value_lists) {
+        if old_sig.returns[i].purpose != ArgumentPurpose::Normal {
+            continue;
+        }
+
+        let old_v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap();
+        let ty = pos.func.dfg.value_type(old_v);
+        let mut legalized_ty = legalized_type_for_sret(ty);
+
+        offset = round_up_to_multiple_of_type_align(offset, legalized_ty);
+
+        let new_legalized_v =
+            pos.ins()
+                .load(legalized_ty, MemFlags::trusted(), sret, offset as i32);
+
+        // "Illegalize" the loaded value from the legalized type back to its
+        // original `ty`. This is basically the opposite of
+        // `legalize_type_for_sret_store`.
+        let mut new_v = new_legalized_v;
+        if ty.is_bool() {
+            legalized_ty = legalized_ty.as_bool_pedantic();
+            new_v = pos.ins().raw_bitcast(legalized_ty, new_v);
+
+            if ty.bits() < legalized_ty.bits() {
+                legalized_ty = ty;
+                new_v = pos.ins().breduce(legalized_ty, new_v);
+            }
+        }
+
+        pos.func.dfg.change_to_alias(old_v, new_v);
+
+        offset += legalized_ty.bytes();
+    }
+
+    pos.func.dfg.old_signatures[sig_ref] = Some(old_sig);
+}
+
 /// Compute original value of type `ty` from the legalized ABI arguments.
 ///
 /// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an
@@ -452,6 +633,13 @@ fn legalize_inst_arguments<ArgType>(
         .constraints()
         .num_fixed_value_arguments();
     let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values;
+    if abi_args < have_args {
+        // This happens with multiple return values after we've legalized the
+        // signature but haven't legalized the return instruction yet. This
+        // legalization is handled in `handle_return_abi`.
+        pos.func.dfg[inst].put_value_list(vlist);
+        return;
+    }
 
     // Grow the value list to the right size and shift all the existing arguments to the right.
     // This lets us write the new argument values into the list without overwriting the old
@@ -508,6 +696,32 @@ fn legalize_inst_arguments<ArgType>(
     pos.func.dfg[inst].put_value_list(vlist);
 }
 
+/// Ensure that the `ty` being returned is a type that can be loaded and stored
+/// (potentially after another narrowing legalization) from memory, since it
+/// will go into the `sret` space.
+fn legalized_type_for_sret(ty: Type) -> Type {
+    if ty.is_bool() {
+        let bits = std::cmp::max(8, ty.bits());
+        Type::int(bits).unwrap()
+    } else {
+        ty
+    }
+}
+
+/// Insert any legalization code required to ensure that `val` can be stored
+/// into the `sret` memory. Returns the (potentially new, potentially
+/// unmodified) legalized value and its type.
+fn legalize_type_for_sret_store(pos: &mut FuncCursor, val: Value, ty: Type) -> (Value, Type) {
+    if ty.is_bool() {
+        let bits = std::cmp::max(8, ty.bits());
+        let ty = Type::int(bits).unwrap();
+        let val = pos.ins().bint(ty, val);
+        (val, ty)
+    } else {
+        (val, ty)
+    }
+}
+
 /// Insert ABI conversion code before and after the call instruction at `pos`.
 ///
 /// Instructions inserted before the call will compute the appropriate ABI values for the
@@ -518,7 +732,12 @@ fn legalize_inst_arguments<ArgType>(
 /// original return values. The call's result values will be adapted to match the new signature.
 ///
 /// Returns `true` if any instructions were inserted.
-pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
+pub fn handle_call_abi(
+    isa: &dyn TargetIsa,
+    mut inst: Inst,
+    func: &mut Function,
+    cfg: &ControlFlowGraph,
+) -> bool {
     let pos = &mut FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
@@ -528,16 +747,27 @@ pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGra
         Err(s) => s,
     };
 
-    // OK, we need to fix the call arguments to match the ABI signature.
-    let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
-    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
-        func.dfg.signatures[sig_ref].params[abi_arg]
-    });
+    let sig = &pos.func.dfg.signatures[sig_ref];
+    let old_sig = &pos.func.dfg.old_signatures[sig_ref];
 
-    if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
-        inst = legalize_inst_results(pos, |func, abi_res| {
-            func.dfg.signatures[sig_ref].returns[abi_res]
+    if sig.uses_struct_return_param()
+        && old_sig
+            .as_ref()
+            .map_or(false, |s| !s.uses_struct_return_param())
+    {
+        legalize_sret_call(isa, pos, sig_ref, inst);
+    } else {
+        // OK, we need to fix the call arguments to match the ABI signature.
+        let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
+        legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+            func.dfg.signatures[sig_ref].params[abi_arg]
         });
+
+        if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
+            inst = legalize_inst_results(pos, |func, abi_res| {
+                func.dfg.signatures[sig_ref].returns[abi_res]
+            });
+        }
     }
 
     debug_assert!(
@@ -586,8 +816,6 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
     legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
         func.signature.returns[abi_arg]
     });
-    debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
-
     // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
     // the legalized signature. These values should simply be propagated from the entry block
     // arguments.
@@ -598,6 +826,8 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
             pos.func.dfg.display_inst(inst, None)
         );
         let mut vlist = pos.func.dfg[inst].take_value_list().unwrap();
+        let mut sret = None;
+
         for arg in &pos.func.signature.returns[abi_args..] {
             match arg.purpose {
                 ArgumentPurpose::Link
@@ -624,10 +854,45 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
                 .ebb_params(pos.func.layout.entry_block().unwrap())[idx];
             debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type);
             vlist.push(val, &mut pos.func.dfg.value_lists);
+
+            if let ArgumentPurpose::StructReturn = arg.purpose {
+                sret = Some(val);
+            }
+        }
+
+        // Store all the regular returns into the retptr space and remove them
+        // from the `return` instruction's value list.
+        if let Some(sret) = sret {
+            let mut offset = 0;
+            let num_regular_rets = vlist.len(&pos.func.dfg.value_lists) - special_args;
+            for i in 0..num_regular_rets {
+                debug_assert_eq!(
+                    pos.func.old_signature.as_ref().unwrap().returns[i].purpose,
+                    ArgumentPurpose::Normal,
+                );
+
+                // The next return value to process is always at `0`, since the
+                // list is emptied as we iterate.
+                let v = vlist.get(0, &pos.func.dfg.value_lists).unwrap();
+                let ty = pos.func.dfg.value_type(v);
+                let (v, ty) = legalize_type_for_sret_store(pos, v, ty);
+
+                let size = ty.bytes();
+                offset = round_up_to_multiple_of_type_align(offset, ty);
+
+                pos.ins().store(MemFlags::trusted(), v, sret, offset as i32);
+                vlist.remove(0, &mut pos.func.dfg.value_lists);
+
+                offset += size;
+            }
         }
         pos.func.dfg[inst].put_value_list(vlist);
     }
 
+    debug_assert_eq!(
+        pos.func.dfg.inst_variable_args(inst).len(),
+        abi_args + special_args
+    );
     debug_assert!(
         check_return_signature(&pos.func.dfg, inst, &pos.func.signature),
         "Signature still wrong: {} / signature {}",
@@ -639,6 +904,56 @@ pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph
     true
 }
 
+fn round_up_to_multiple_of_type_align(bytes: u32, ty: Type) -> u32 {
+    // We don't have a dedicated alignment for types, so assume they are
+    // size-aligned.
+    let align = ty.bytes();
+    round_up_to_multiple_of_pow2(bytes, align)
+}
+
+/// Round `n` up to the next multiple of `to` that is greater than or equal to
+/// `n`.
+///
+/// `to` must be a power of two and greater than zero.
+///
+/// This is useful for rounding an offset or pointer up to some type's required
+/// alignment.
+fn round_up_to_multiple_of_pow2(n: u32, to: u32) -> u32 {
+    debug_assert!(to > 0);
+    debug_assert!(to.is_power_of_two());
+
+    // The simple version of this function is
+    //
+    //     (n + to - 1) / to * to
+    //
+    // Consider the numerator: `n + to - 1`. This is ensuring that if there is
+    // any remainder for `n / to`, then the result of the division is one
+    // greater than `n / to`, and that otherwise we get exactly the same result
+    // as `n / to` due to integer division rounding off the remainder. In other
+    // words, we only round up if `n` is not aligned to `to`.
+    //
+    // However, we know `to` is a power of two, and therefore `anything / to` is
+    // equivalent to `anything >> log2(to)` and `anything * to` is equivalent to
+    // `anything << log2(to)`. We can therefore rewrite our simplified function
+    // into the following:
+    //
+    //     (n + to - 1) >> log2(to) << log2(to)
+    //
+    // But shifting a value right by some number of bits `b` and then shifting
+    // it left by that same number of bits `b` is equivalent to clearing the
+    // bottom `b` bits of the number. We can clear the bottom `b` bits of a
+    // number by bit-wise and'ing the number with the bit-wise not of `2^b - 1`.
+    // Plugging this into our function and simplifying, we get:
+    //
+    //       (n + to - 1) >> log2(to) << log2(to)
+    //     = (n + to - 1) & !(2^log2(to) - 1)
+    //     = (n + to - 1) & !(to - 1)
+    //
+    // And now we have the final version of this function!
+
+    (n + to - 1) & !(to - 1)
+}
+
 /// Assign stack slots to incoming function parameters on the stack.
 ///
 /// Values that are passed into the function on the stack must be assigned to an `IncomingArg`
@@ -714,3 +1029,34 @@ fn spill_call_arguments(pos: &mut FuncCursor) -> bool {
     // We changed stuff.
     true
 }
+
+#[cfg(test)]
+mod tests {
+    use super::round_up_to_multiple_of_pow2;
+
+    #[test]
+    fn round_up_to_multiple_of_pow2_works() {
+        for (n, to, expected) in vec![
+            (0, 1, 0),
+            (1, 1, 1),
+            (2, 1, 2),
+            (0, 2, 0),
+            (1, 2, 2),
+            (2, 2, 2),
+            (3, 2, 4),
+            (0, 4, 0),
+            (1, 4, 4),
+            (2, 4, 4),
+            (3, 4, 4),
+            (4, 4, 4),
+            (5, 4, 8),
+        ] {
+            let actual = round_up_to_multiple_of_pow2(n, to);
+            assert_eq!(
+                actual, expected,
+                "round_up_to_multiple_of_pow2(n = {}, to = {}) = {} (expected {})",
+                n, to, actual, expected
+            );
+        }
+    }
+}
diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs
index 0d6a6c0bb9..dcebff7ce4 100644
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -55,7 +55,7 @@ fn legalize_inst(
 
     // Check for ABI boundaries that need to be converted to the legalized signature.
     if opcode.is_call() {
-        if boundary::handle_call_abi(inst, pos.func, cfg) {
+        if boundary::handle_call_abi(isa, inst, pos.func, cfg) {
             return LegalizeInstResult::Legalized;
         }
     } else if opcode.is_return() {
diff --git a/cranelift/codegen/src/stack_layout.rs b/cranelift/codegen/src/stack_layout.rs
index 0f5c8ae639..732f9365c0 100644
--- a/cranelift/codegen/src/stack_layout.rs
+++ b/cranelift/codegen/src/stack_layout.rs
@@ -25,7 +25,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
     // stack layout from high to low addresses will be:
     //
     // 1. incoming arguments.
-    // 2. spills + explicits.
+    // 2. spills + explicits + struct returns.
     // 3. outgoing arguments.
     //
     // The incoming arguments can have both positive and negative offsets. A negative offset
@@ -56,7 +56,8 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
                     .ok_or(CodegenError::ImplLimitExceeded)?;
                 outgoing_max = max(outgoing_max, offset);
             }
-            StackSlotKind::SpillSlot
+            StackSlotKind::StructReturnSlot
+            | StackSlotKind::SpillSlot
             | StackSlotKind::ExplicitSlot
             | StackSlotKind::EmergencySlot => {
                 // Determine the smallest alignment of any explicit or spill slot.
@@ -65,9 +66,9 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
         }
     }
 
-    // Lay out spill slots and explicit slots below the incoming arguments.
-    // The offset is negative, growing downwards.
-    // Start with the smallest alignments for better packing.
+    // Lay out spill slots, struct return slots, and explicit slots below the
+    // incoming arguments. The offset is negative, growing downwards. Start with
+    // the smallest alignments for better packing.
     let mut offset = incoming_min;
     debug_assert!(min_align.is_power_of_two());
     while min_align <= alignment {
@@ -75,6 +76,7 @@ pub fn layout_stack(frame: &mut StackSlots, alignment: StackSize) -> CodegenResu
             // Pick out explicit and spill slots with exact alignment `min_align`.
             match slot.kind {
                 StackSlotKind::SpillSlot
+                | StackSlotKind::StructReturnSlot
                 | StackSlotKind::ExplicitSlot
                 | StackSlotKind::EmergencySlot => {
                     if slot.alignment(alignment) != min_align {
diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif
new file mode 100644
index 0000000000..582403dcfb
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-b1.clif
@@ -0,0 +1,68 @@
+test compile
+target x86_64 haswell
+
+;; `b1` return values need to be legalized into bytes so that they can be stored
+;; in memory.
+
+function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 {
+;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast {
+
+ebb0(v0: b1, v1: b1, v2: b1, v3: b1):
+; check: ebb0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]):
+
+    return v0, v1, v2, v3
+    ; check:  v5 = bint.i8 v0
+    ; nextln: v9 = uextend.i32 v5
+    ; nextln: istore8 notrap aligned v9, v4
+    ; nextln: v6 = bint.i8 v1
+    ; nextln: v10 = uextend.i32 v6
+    ; nextln: istore8 notrap aligned v10, v4+1
+    ; nextln: v7 = bint.i8 v2
+    ; nextln: v11 = uextend.i32 v7
+    ; nextln: istore8 notrap aligned v11, v4+2
+    ; nextln: v8 = bint.i8 v3
+    ; nextln: v12 = uextend.i32 v8
+    ; nextln: istore8 notrap aligned v12, v4+3
+}
+
+function %call_4_b1s() {
+; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast {
+; nextln:    ss0 = sret_slot 4, offset -28
+
+    fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1
+    ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast
+
+ebb0:
+; check: ebb0(v26: i64 [%rbp], v27: i64 [%rbx]):
+
+    v0 = bconst.b1 true
+    v1 = bconst.b1 false
+    v2 = bconst.b1 true
+    v3 = bconst.b1 false
+
+    ; check: v8 = stack_addr.i64 ss0
+    v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
+    ; check:  v9 = call fn0(v0, v1, v2, v3, v8)
+    ; nextln: v22 = uload8.i32 notrap aligned v9
+    ; nextln: v10 = ireduce.i8 v22
+    ; nextln: v11 = raw_bitcast.b8 v10
+    ; nextln: v12 = breduce.b1 v11
+    ; nextln: v4 -> v12
+    ; nextln: v23 = uload8.i32 notrap aligned v9+1
+    ; nextln: v13 = ireduce.i8 v23
+    ; nextln: v14 = raw_bitcast.b8 v13
+    ; nextln: v15 = breduce.b1 v14
+    ; nextln: v5 -> v15
+    ; nextln: v24 = uload8.i32 notrap aligned v9+2
+    ; nextln: v16 = ireduce.i8 v24
+    ; nextln: v17 = raw_bitcast.b8 v16
+    ; nextln: v18 = breduce.b1 v17
+    ; nextln: v6 -> v18
+    ; nextln: v25 = uload8.i32 notrap aligned v9+3
+    ; nextln: v19 = ireduce.i8 v25
+    ; nextln: v20 = raw_bitcast.b8 v19
+    ; nextln: v21 = breduce.b1 v20
+    ; nextln: v7 -> v21
+
+    return
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
new file mode 100644
index 0000000000..b102d652cf
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
@@ -0,0 +1,26 @@
+test legalizer
+target x86_64 haswell
+
+;; Indirect calls with many returns.
+
+function %call_indirect_many_rets(i64) {
+    ; check: ss0 = sret_slot 32
+
+    sig0 = () -> i64, i64, i64, i64
+    ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+
+ebb0(v0: i64):
+    v1, v2, v3, v4 = call_indirect sig0, v0()
+    ; check:  v5 = stack_addr.i64 ss0
+    ; nextln: v6 = call_indirect sig0, v0(v5)
+    ; nextln: v7 = load.i64 notrap aligned v6
+    ; nextln: v1 -> v7
+    ; nextln: v8 = load.i64 notrap aligned v6+8
+    ; nextln: v2 -> v8
+    ; nextln: v9 = load.i64 notrap aligned v6+16
+    ; nextln: v3 -> v9
+    ; nextln: v10 = load.i64 notrap aligned v6+24
+    ; nextln: v4 -> v10
+
+    return
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-f32.clif b/cranelift/filetests/filetests/wasm/multi-val-f32.clif
new file mode 100644
index 0000000000..9f3d0047cd
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-f32.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many f32s
+
+function %return_2_f32s() -> f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    return v0, v1
+}
+
+function %return_3_f32s() -> f32, f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    v2 = f32const 0x2.0
+    return v0, v1, v2
+}
+
+function %return_4_f32s() -> f32, f32, f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    v2 = f32const 0x2.0
+    v3 = f32const 0x3.0
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many f32s
+
+function %call() -> f32 {
+    fn0 = %a() -> f32, f32
+    fn1 = %b(f32, f32) -> f32, f32, f32
+    fn2 = %c(f32, f32, f32) -> f32, f32, f32, f32
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = fadd v5, v6
+    v10 = fadd v7, v8
+    v11 = fadd v9, v10
+    return v11
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-f64.clif b/cranelift/filetests/filetests/wasm/multi-val-f64.clif
new file mode 100644
index 0000000000..aa7e263eba
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-f64.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many f64s
+
+function %return_2_f64s() -> f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    return v0, v1
+}
+
+function %return_3_f64s() -> f64, f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    v2 = f64const 0x2.0
+    return v0, v1, v2
+}
+
+function %return_4_f64s() -> f64, f64, f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    v2 = f64const 0x2.0
+    v3 = f64const 0x3.0
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many f64s
+
+function %call() -> f64 {
+    fn0 = %a() -> f64, f64
+    fn1 = %b(f64, f64) -> f64, f64, f64
+    fn2 = %c(f64, f64, f64) -> f64, f64, f64, f64
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = fadd v5, v6
+    v10 = fadd v7, v8
+    v11 = fadd v9, v10
+    return v11
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-i32.clif b/cranelift/filetests/filetests/wasm/multi-val-i32.clif
new file mode 100644
index 0000000000..924fcb4bc6
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-i32.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many i32s
+
+function %return_2_i32s() -> i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    return v0, v1
+}
+
+function %return_3_i32s() -> i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    return v0, v1, v2
+}
+
+function %return_4_i32s() -> i32, i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many i32s
+
+function %call() -> i32 {
+    fn0 = %a() -> i32, i32
+    fn1 = %b(i32, i32) -> i32, i32, i32
+    fn2 = %c(i32, i32, i32) -> i32, i32, i32, i32
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = iadd v5, v6
+    v10 = iadd v7, v8
+    v11 = iadd v9, v10
+    return v11
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-i64.clif b/cranelift/filetests/filetests/wasm/multi-val-i64.clif
new file mode 100644
index 0000000000..f5ab392693
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-i64.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many i64s
+
+function %return_2_i64s() -> i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    return v0, v1
+}
+
+function %return_3_i64s() -> i64, i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    v2 = iconst.i64 2
+    return v0, v1, v2
+}
+
+function %return_4_i64s() -> i64, i64, i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    v2 = iconst.i64 2
+    v3 = iconst.i64 3
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many i64s
+
+function %call() -> i64 {
+    fn0 = %a() -> i64, i64
+    fn1 = %b(i64, i64) -> i64, i64, i64
+    fn2 = %c(i64, i64, i64) -> i64, i64, i64, i64
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = iadd v5, v6
+    v10 = iadd v7, v8
+    v11 = iadd v9, v10
+    return v11
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-mixed.clif b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
new file mode 100644
index 0000000000..db66d202ff
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
@@ -0,0 +1,2098 @@
+test compile
+target x86_64 haswell
+
+;; Returning many mixed values.
+;;
+;; This test was generated programmatically with this python script:
+;;
+;; ```
+;; from itertools import permutations
+;;
+;; def make_val(i, r):
+;;     val = None
+;;     op = None
+;;     if r == "f32":
+;;         val = "0x0.0"
+;;         op = "f32const"
+;;     elif r == "f64":
+;;         val = "0x0.0"
+;;         op = "f64const"
+;;     elif r == "i32":
+;;         val = "0"
+;;         op = "iconst.i32"
+;;     elif r == "i64":
+;;         val = "0"
+;;         op = "iconst.i64"
+;;     elif r == "b1":
+;;         val = "true"
+;;         op = "bconst.b1"
+;;     else:
+;;         raise Exception("bad r = " + str(r))
+;;     return "    v" + str(i) + " = " + op + " " + val
+;;
+;; def make_returner(results):
+;;     results = list(results)
+;;     head = "function %return_" + "_".join(results) + "() -> " + ", ".join(results) + " {\n"
+;;     ebb = "ebb0:\n"
+;;     vals = [make_val(i, r) for i, r in enumerate(results)]
+;;     ret = "    return " + ", ".join(("v" + str(i) for i in range(0, len(results))))
+;;     return head + ebb + "\n".join(vals) + "\n" + ret + "\n}\n"
+;;
+;; def make_caller(results):
+;;     results = list(results)
+;;     head = "function %call_" + "_".join(results) + "() {\n"
+;;     fn_decl = "    fn0 = %foo() -> " + ",".join(results) + "\n"
+;;     ebb = "ebb0:\n"
+;;     ret_vars = ["v" + str(i) for i, r in enumerate(results)]
+;;     call = "    " + ",".join(ret_vars) + " = call fn0()\n"
+;;     ret = "    return\n"
+;;     tail = "}\n"
+;;     return head + fn_decl + ebb + call + ret + tail
+;;
+;; for results in permutations(["i32", "i64", "f32", "f64", "b1"]):
+;;     print make_returner(results)
+;;     print make_caller(results)
+;; ```
+;;
+;; If you're modifying this test, it is likely easier to modify the script and
+;; regenerate the test.
+
+function %return_i32_i64_f32_f64_b1() -> i32, i64, f32, f64, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_f32_f64_b1() {
+    fn0 = %foo() -> i32,i64,f32,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_i64_f32_b1_f64() -> i32, i64, f32, b1, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_f32_b1_f64() {
+    fn0 = %foo() -> i32,i64,f32,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_i64_f64_f32_b1() -> i32, i64, f64, f32, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_f64_f32_b1() {
+    fn0 = %foo() -> i32,i64,f64,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_i64_f64_b1_f32() -> i32, i64, f64, b1, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_f64_b1_f32() {
+    fn0 = %foo() -> i32,i64,f64,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_i64_b1_f32_f64() -> i32, i64, b1, f32, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_b1_f32_f64() {
+    fn0 = %foo() -> i32,i64,b1,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_i64_b1_f64_f32() -> i32, i64, b1, f64, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_i64_b1_f64_f32() {
+    fn0 = %foo() -> i32,i64,b1,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_i64_f64_b1() -> i32, f32, i64, f64, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_i64_f64_b1() {
+    fn0 = %foo() -> i32,f32,i64,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_i64_b1_f64() -> i32, f32, i64, b1, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_i64_b1_f64() {
+    fn0 = %foo() -> i32,f32,i64,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_f64_i64_b1() -> i32, f32, f64, i64, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_f64_i64_b1() {
+    fn0 = %foo() -> i32,f32,f64,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_f64_b1_i64() -> i32, f32, f64, b1, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_f64_b1_i64() {
+    fn0 = %foo() -> i32,f32,f64,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_b1_i64_f64() -> i32, f32, b1, i64, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_b1_i64_f64() {
+    fn0 = %foo() -> i32,f32,b1,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f32_b1_f64_i64() -> i32, f32, b1, f64, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f32_b1_f64_i64() {
+    fn0 = %foo() -> i32,f32,b1,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_i64_f32_b1() -> i32, f64, i64, f32, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_i64_f32_b1() {
+    fn0 = %foo() -> i32,f64,i64,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_i64_b1_f32() -> i32, f64, i64, b1, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_i64_b1_f32() {
+    fn0 = %foo() -> i32,f64,i64,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_f32_i64_b1() -> i32, f64, f32, i64, b1 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_f32_i64_b1() {
+    fn0 = %foo() -> i32,f64,f32,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_f32_b1_i64() -> i32, f64, f32, b1, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_f32_b1_i64() {
+    fn0 = %foo() -> i32,f64,f32,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_b1_i64_f32() -> i32, f64, b1, i64, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_b1_i64_f32() {
+    fn0 = %foo() -> i32,f64,b1,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_f64_b1_f32_i64() -> i32, f64, b1, f32, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_f64_b1_f32_i64() {
+    fn0 = %foo() -> i32,f64,b1,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_i64_f32_f64() -> i32, b1, i64, f32, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_i64_f32_f64() {
+    fn0 = %foo() -> i32,b1,i64,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_i64_f64_f32() -> i32, b1, i64, f64, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_i64_f64_f32() {
+    fn0 = %foo() -> i32,b1,i64,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_f32_i64_f64() -> i32, b1, f32, i64, f64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_f32_i64_f64() {
+    fn0 = %foo() -> i32,b1,f32,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_f32_f64_i64() -> i32, b1, f32, f64, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_f32_f64_i64() {
+    fn0 = %foo() -> i32,b1,f32,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_f64_i64_f32() -> i32, b1, f64, i64, f32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_f64_i64_f32() {
+    fn0 = %foo() -> i32,b1,f64,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i32_b1_f64_f32_i64() -> i32, b1, f64, f32, i64 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i32_b1_f64_f32_i64() {
+    fn0 = %foo() -> i32,b1,f64,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_f32_f64_b1() -> i64, i32, f32, f64, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_f32_f64_b1() {
+    fn0 = %foo() -> i64,i32,f32,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_f32_b1_f64() -> i64, i32, f32, b1, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_f32_b1_f64() {
+    fn0 = %foo() -> i64,i32,f32,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_f64_f32_b1() -> i64, i32, f64, f32, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_f64_f32_b1() {
+    fn0 = %foo() -> i64,i32,f64,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_f64_b1_f32() -> i64, i32, f64, b1, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_f64_b1_f32() {
+    fn0 = %foo() -> i64,i32,f64,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_b1_f32_f64() -> i64, i32, b1, f32, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_b1_f32_f64() {
+    fn0 = %foo() -> i64,i32,b1,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_i32_b1_f64_f32() -> i64, i32, b1, f64, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_i32_b1_f64_f32() {
+    fn0 = %foo() -> i64,i32,b1,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_i32_f64_b1() -> i64, f32, i32, f64, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_i32_f64_b1() {
+    fn0 = %foo() -> i64,f32,i32,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_i32_b1_f64() -> i64, f32, i32, b1, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_i32_b1_f64() {
+    fn0 = %foo() -> i64,f32,i32,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_f64_i32_b1() -> i64, f32, f64, i32, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_f64_i32_b1() {
+    fn0 = %foo() -> i64,f32,f64,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_f64_b1_i32() -> i64, f32, f64, b1, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_f64_b1_i32() {
+    fn0 = %foo() -> i64,f32,f64,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_b1_i32_f64() -> i64, f32, b1, i32, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_b1_i32_f64() {
+    fn0 = %foo() -> i64,f32,b1,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f32_b1_f64_i32() -> i64, f32, b1, f64, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f32_b1_f64_i32() {
+    fn0 = %foo() -> i64,f32,b1,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_i32_f32_b1() -> i64, f64, i32, f32, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_i32_f32_b1() {
+    fn0 = %foo() -> i64,f64,i32,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_i32_b1_f32() -> i64, f64, i32, b1, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_i32_b1_f32() {
+    fn0 = %foo() -> i64,f64,i32,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_f32_i32_b1() -> i64, f64, f32, i32, b1 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_f32_i32_b1() {
+    fn0 = %foo() -> i64,f64,f32,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_f32_b1_i32() -> i64, f64, f32, b1, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_f32_b1_i32() {
+    fn0 = %foo() -> i64,f64,f32,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_b1_i32_f32() -> i64, f64, b1, i32, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_b1_i32_f32() {
+    fn0 = %foo() -> i64,f64,b1,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_f64_b1_f32_i32() -> i64, f64, b1, f32, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_f64_b1_f32_i32() {
+    fn0 = %foo() -> i64,f64,b1,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_i32_f32_f64() -> i64, b1, i32, f32, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_i32_f32_f64() {
+    fn0 = %foo() -> i64,b1,i32,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_i32_f64_f32() -> i64, b1, i32, f64, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_i32_f64_f32() {
+    fn0 = %foo() -> i64,b1,i32,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_f32_i32_f64() -> i64, b1, f32, i32, f64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_f32_i32_f64() {
+    fn0 = %foo() -> i64,b1,f32,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_f32_f64_i32() -> i64, b1, f32, f64, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_f32_f64_i32() {
+    fn0 = %foo() -> i64,b1,f32,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_f64_i32_f32() -> i64, b1, f64, i32, f32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_f64_i32_f32() {
+    fn0 = %foo() -> i64,b1,f64,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_i64_b1_f64_f32_i32() -> i64, b1, f64, f32, i32 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_i64_b1_f64_f32_i32() {
+    fn0 = %foo() -> i64,b1,f64,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_i64_f64_b1() -> f32, i32, i64, f64, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_i64_f64_b1() {
+    fn0 = %foo() -> f32,i32,i64,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_i64_b1_f64() -> f32, i32, i64, b1, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_i64_b1_f64() {
+    fn0 = %foo() -> f32,i32,i64,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_f64_i64_b1() -> f32, i32, f64, i64, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_f64_i64_b1() {
+    fn0 = %foo() -> f32,i32,f64,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_f64_b1_i64() -> f32, i32, f64, b1, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_f64_b1_i64() {
+    fn0 = %foo() -> f32,i32,f64,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_b1_i64_f64() -> f32, i32, b1, i64, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_b1_i64_f64() {
+    fn0 = %foo() -> f32,i32,b1,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i32_b1_f64_i64() -> f32, i32, b1, f64, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i32_b1_f64_i64() {
+    fn0 = %foo() -> f32,i32,b1,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_i32_f64_b1() -> f32, i64, i32, f64, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_i32_f64_b1() {
+    fn0 = %foo() -> f32,i64,i32,f64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_i32_b1_f64() -> f32, i64, i32, b1, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_i32_b1_f64() {
+    fn0 = %foo() -> f32,i64,i32,b1,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_f64_i32_b1() -> f32, i64, f64, i32, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_f64_i32_b1() {
+    fn0 = %foo() -> f32,i64,f64,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_f64_b1_i32() -> f32, i64, f64, b1, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_f64_b1_i32() {
+    fn0 = %foo() -> f32,i64,f64,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_b1_i32_f64() -> f32, i64, b1, i32, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_b1_i32_f64() {
+    fn0 = %foo() -> f32,i64,b1,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_i64_b1_f64_i32() -> f32, i64, b1, f64, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_i64_b1_f64_i32() {
+    fn0 = %foo() -> f32,i64,b1,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_i32_i64_b1() -> f32, f64, i32, i64, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_i32_i64_b1() {
+    fn0 = %foo() -> f32,f64,i32,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_i32_b1_i64() -> f32, f64, i32, b1, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_i32_b1_i64() {
+    fn0 = %foo() -> f32,f64,i32,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_i64_i32_b1() -> f32, f64, i64, i32, b1 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_i64_i32_b1() {
+    fn0 = %foo() -> f32,f64,i64,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_i64_b1_i32() -> f32, f64, i64, b1, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_i64_b1_i32() {
+    fn0 = %foo() -> f32,f64,i64,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_b1_i32_i64() -> f32, f64, b1, i32, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_b1_i32_i64() {
+    fn0 = %foo() -> f32,f64,b1,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_f64_b1_i64_i32() -> f32, f64, b1, i64, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f64const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_f64_b1_i64_i32() {
+    fn0 = %foo() -> f32,f64,b1,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_i32_i64_f64() -> f32, b1, i32, i64, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_i32_i64_f64() {
+    fn0 = %foo() -> f32,b1,i32,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_i32_f64_i64() -> f32, b1, i32, f64, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_i32_f64_i64() {
+    fn0 = %foo() -> f32,b1,i32,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_i64_i32_f64() -> f32, b1, i64, i32, f64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_i64_i32_f64() {
+    fn0 = %foo() -> f32,b1,i64,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_i64_f64_i32() -> f32, b1, i64, f64, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_i64_f64_i32() {
+    fn0 = %foo() -> f32,b1,i64,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_f64_i32_i64() -> f32, b1, f64, i32, i64 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_f64_i32_i64() {
+    fn0 = %foo() -> f32,b1,f64,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f32_b1_f64_i64_i32() -> f32, b1, f64, i64, i32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = bconst.b1 true
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f32_b1_f64_i64_i32() {
+    fn0 = %foo() -> f32,b1,f64,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_i64_f32_b1() -> f64, i32, i64, f32, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_i64_f32_b1() {
+    fn0 = %foo() -> f64,i32,i64,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_i64_b1_f32() -> f64, i32, i64, b1, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_i64_b1_f32() {
+    fn0 = %foo() -> f64,i32,i64,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_f32_i64_b1() -> f64, i32, f32, i64, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_f32_i64_b1() {
+    fn0 = %foo() -> f64,i32,f32,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_f32_b1_i64() -> f64, i32, f32, b1, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_f32_b1_i64() {
+    fn0 = %foo() -> f64,i32,f32,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_b1_i64_f32() -> f64, i32, b1, i64, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_b1_i64_f32() {
+    fn0 = %foo() -> f64,i32,b1,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i32_b1_f32_i64() -> f64, i32, b1, f32, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i32 0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i32_b1_f32_i64() {
+    fn0 = %foo() -> f64,i32,b1,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_i32_f32_b1() -> f64, i64, i32, f32, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_i32_f32_b1() {
+    fn0 = %foo() -> f64,i64,i32,f32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_i32_b1_f32() -> f64, i64, i32, b1, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_i32_b1_f32() {
+    fn0 = %foo() -> f64,i64,i32,b1,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_f32_i32_b1() -> f64, i64, f32, i32, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_f32_i32_b1() {
+    fn0 = %foo() -> f64,i64,f32,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_f32_b1_i32() -> f64, i64, f32, b1, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_f32_b1_i32() {
+    fn0 = %foo() -> f64,i64,f32,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_b1_i32_f32() -> f64, i64, b1, i32, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_b1_i32_f32() {
+    fn0 = %foo() -> f64,i64,b1,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_i64_b1_f32_i32() -> f64, i64, b1, f32, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = iconst.i64 0
+    v2 = bconst.b1 true
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_i64_b1_f32_i32() {
+    fn0 = %foo() -> f64,i64,b1,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_i32_i64_b1() -> f64, f32, i32, i64, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_i32_i64_b1() {
+    fn0 = %foo() -> f64,f32,i32,i64,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_i32_b1_i64() -> f64, f32, i32, b1, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = bconst.b1 true
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_i32_b1_i64() {
+    fn0 = %foo() -> f64,f32,i32,b1,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_i64_i32_b1() -> f64, f32, i64, i32, b1 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = bconst.b1 true
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_i64_i32_b1() {
+    fn0 = %foo() -> f64,f32,i64,i32,b1
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_i64_b1_i32() -> f64, f32, i64, b1, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = bconst.b1 true
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_i64_b1_i32() {
+    fn0 = %foo() -> f64,f32,i64,b1,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_b1_i32_i64() -> f64, f32, b1, i32, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_b1_i32_i64() {
+    fn0 = %foo() -> f64,f32,b1,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_f32_b1_i64_i32() -> f64, f32, b1, i64, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f32const 0x0.0
+    v2 = bconst.b1 true
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_f32_b1_i64_i32() {
+    fn0 = %foo() -> f64,f32,b1,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_i32_i64_f32() -> f64, b1, i32, i64, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_i32_i64_f32() {
+    fn0 = %foo() -> f64,b1,i32,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_i32_f32_i64() -> f64, b1, i32, f32, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_i32_f32_i64() {
+    fn0 = %foo() -> f64,b1,i32,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_i64_i32_f32() -> f64, b1, i64, i32, f32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_i64_i32_f32() {
+    fn0 = %foo() -> f64,b1,i64,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_i64_f32_i32() -> f64, b1, i64, f32, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_i64_f32_i32() {
+    fn0 = %foo() -> f64,b1,i64,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_f32_i32_i64() -> f64, b1, f32, i32, i64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_f32_i32_i64() {
+    fn0 = %foo() -> f64,b1,f32,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_f64_b1_f32_i64_i32() -> f64, b1, f32, i64, i32 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = bconst.b1 true
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_f64_b1_f32_i64_i32() {
+    fn0 = %foo() -> f64,b1,f32,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_i64_f32_f64() -> b1, i32, i64, f32, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_i64_f32_f64() {
+    fn0 = %foo() -> b1,i32,i64,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_i64_f64_f32() -> b1, i32, i64, f64, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_i64_f64_f32() {
+    fn0 = %foo() -> b1,i32,i64,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_f32_i64_f64() -> b1, i32, f32, i64, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_f32_i64_f64() {
+    fn0 = %foo() -> b1,i32,f32,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_f32_f64_i64() -> b1, i32, f32, f64, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_f32_f64_i64() {
+    fn0 = %foo() -> b1,i32,f32,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_f64_i64_f32() -> b1, i32, f64, i64, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_f64_i64_f32() {
+    fn0 = %foo() -> b1,i32,f64,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i32_f64_f32_i64() -> b1, i32, f64, f32, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i32 0
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i32_f64_f32_i64() {
+    fn0 = %foo() -> b1,i32,f64,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_i32_f32_f64() -> b1, i64, i32, f32, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_i32_f32_f64() {
+    fn0 = %foo() -> b1,i64,i32,f32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_i32_f64_f32() -> b1, i64, i32, f64, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_i32_f64_f32() {
+    fn0 = %foo() -> b1,i64,i32,f64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_f32_i32_f64() -> b1, i64, f32, i32, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_f32_i32_f64() {
+    fn0 = %foo() -> b1,i64,f32,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_f32_f64_i32() -> b1, i64, f32, f64, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = f32const 0x0.0
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_f32_f64_i32() {
+    fn0 = %foo() -> b1,i64,f32,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_f64_i32_f32() -> b1, i64, f64, i32, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_f64_i32_f32() {
+    fn0 = %foo() -> b1,i64,f64,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_i64_f64_f32_i32() -> b1, i64, f64, f32, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = iconst.i64 0
+    v2 = f64const 0x0.0
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_i64_f64_f32_i32() {
+    fn0 = %foo() -> b1,i64,f64,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_i32_i64_f64() -> b1, f32, i32, i64, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_i32_i64_f64() {
+    fn0 = %foo() -> b1,f32,i32,i64,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_i32_f64_i64() -> b1, f32, i32, f64, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = iconst.i32 0
+    v3 = f64const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_i32_f64_i64() {
+    fn0 = %foo() -> b1,f32,i32,f64,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_i64_i32_f64() -> b1, f32, i64, i32, f64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = f64const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_i64_i32_f64() {
+    fn0 = %foo() -> b1,f32,i64,i32,f64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_i64_f64_i32() -> b1, f32, i64, f64, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = iconst.i64 0
+    v3 = f64const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_i64_f64_i32() {
+    fn0 = %foo() -> b1,f32,i64,f64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_f64_i32_i64() -> b1, f32, f64, i32, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_f64_i32_i64() {
+    fn0 = %foo() -> b1,f32,f64,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f32_f64_i64_i32() -> b1, f32, f64, i64, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f32const 0x0.0
+    v2 = f64const 0x0.0
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f32_f64_i64_i32() {
+    fn0 = %foo() -> b1,f32,f64,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_i32_i64_f32() -> b1, f64, i32, i64, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = iconst.i64 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_i32_i64_f32() {
+    fn0 = %foo() -> b1,f64,i32,i64,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_i32_f32_i64() -> b1, f64, i32, f32, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = iconst.i32 0
+    v3 = f32const 0x0.0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_i32_f32_i64() {
+    fn0 = %foo() -> b1,f64,i32,f32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_i64_i32_f32() -> b1, f64, i64, i32, f32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = iconst.i32 0
+    v4 = f32const 0x0.0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_i64_i32_f32() {
+    fn0 = %foo() -> b1,f64,i64,i32,f32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_i64_f32_i32() -> b1, f64, i64, f32, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = iconst.i64 0
+    v3 = f32const 0x0.0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_i64_f32_i32() {
+    fn0 = %foo() -> b1,f64,i64,f32,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_f32_i32_i64() -> b1, f64, f32, i32, i64 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = iconst.i32 0
+    v4 = iconst.i64 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_f32_i32_i64() {
+    fn0 = %foo() -> b1,f64,f32,i32,i64
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
+
+function %return_b1_f64_f32_i64_i32() -> b1, f64, f32, i64, i32 {
+ebb0:
+    v0 = bconst.b1 true
+    v1 = f64const 0x0.0
+    v2 = f32const 0x0.0
+    v3 = iconst.i64 0
+    v4 = iconst.i32 0
+    return v0, v1, v2, v3, v4
+}
+
+function %call_b1_f64_f32_i64_i32() {
+    fn0 = %foo() -> b1,f64,f32,i64,i32
+ebb0:
+    v0,v1,v2,v3,v4 = call fn0()
+    return
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
new file mode 100644
index 0000000000..f7d0bf846c
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
@@ -0,0 +1,61 @@
+test legalizer
+target x86_64 haswell
+
+;; Test that we don't reuse `sret` stack slots for multiple calls. We could do
+;; this one day, but it would require some care to ensure that we don't have
+;; subsequent calls overwrite the results of previous calls.
+
+function %foo() -> i32, f32 {
+    ; check:  ss0 = sret_slot 20
+    ; nextln: ss1 = sret_slot 20
+
+    fn0 = %f() -> i32, i32, i32, i32, i32
+    fn1 = %g() -> f32, f32, f32, f32, f32
+    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: fn0 = %f sig0
+    ; nextln: fn1 = %g sig1
+
+ebb0:
+    v0, v1, v2, v3, v4 = call fn0()
+    ; check:  v18 = stack_addr.i64 ss0
+    ; nextln: v25 = func_addr.i64 fn0
+    ; nextln: v19 = call_indirect sig0, v25(v18)
+    ; nextln: v20 = load.i32 notrap aligned v19
+    ; nextln: v0 -> v20
+    ; nextln: v21 = load.i32 notrap aligned v19+4
+    ; nextln: v1 -> v21
+    ; nextln: v22 = load.i32 notrap aligned v19+8
+    ; nextln: v2 -> v22
+    ; nextln: v23 = load.i32 notrap aligned v19+12
+    ; nextln: v3 -> v23
+    ; nextln: v24 = load.i32 notrap aligned v19+16
+    ; nextln: v4 -> v24
+
+    v5, v6, v7, v8, v9 = call fn1()
+    ; check:  v26 = stack_addr.i64 ss1
+    ; nextln: v33 = func_addr.i64 fn1
+    ; nextln: v27 = call_indirect sig1, v33(v26)
+    ; nextln: v28 = load.f32 notrap aligned v27
+    ; nextln: v5 -> v28
+    ; nextln: v29 = load.f32 notrap aligned v27+4
+    ; nextln: v6 -> v29
+    ; nextln: v30 = load.f32 notrap aligned v27+8
+    ; nextln: v7 -> v30
+    ; nextln: v31 = load.f32 notrap aligned v27+12
+    ; nextln: v8 -> v31
+    ; nextln: v32 = load.f32 notrap aligned v27+16
+    ; nextln: v9 -> v32
+
+    v10 = iadd v0, v1
+    v11 = iadd v2, v3
+    v12 = iadd v10, v11
+    v13 = iadd v12, v4
+
+    v14 = fadd v5, v6
+    v15 = fadd v7, v8
+    v16 = fadd v14, v15
+    v17 = fadd v16, v9
+
+    return v13, v17
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
new file mode 100644
index 0000000000..b6c74e314e
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
@@ -0,0 +1,51 @@
+test legalizer
+target x86_64 haswell
+
+;; Need to insert padding after the `i8`s so that the `i32` and `i64` are
+;; aligned.
+
+function %returner() -> i8, i32, i8, i64 {
+; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast {
+
+ebb0:
+; check: ebb0(v4: i64):
+
+    v0 = iconst.i8 0
+    v1 = iconst.i32 1
+    v2 = iconst.i8 2
+    v3 = iconst.i64 3
+    return v0, v1, v2, v3
+    ; check:  v6 = uextend.i32 v0
+    ; nextln: istore8 notrap aligned v6, v4
+    ; nextln: store notrap aligned v1, v4+4
+    ; nextln: v7 = uextend.i32 v2
+    ; nextln: istore8 notrap aligned v7, v4+8
+    ; nextln: store notrap aligned v3, v4+16
+    ; nextln: return v4
+}
+
+function %caller() {
+    ; check:  ss0 = sret_slot 24
+
+    fn0 = %returner() -> i8, i32, i8, i64
+    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: fn0 = %returner sig0
+
+ebb0:
+    v0, v1, v2, v3 = call fn0()
+    ; check:  v4 = stack_addr.i64 ss0
+    ; nextln: v10 = func_addr.i64 fn0
+    ; nextln: v5 = call_indirect sig0, v10(v4)
+    ; nextln: v11 = uload8.i32 notrap aligned v5
+    ; nextln: v6 = ireduce.i8 v11
+    ; nextln: v0 -> v6
+    ; nextln: v7 = load.i32 notrap aligned v5+4
+    ; nextln: v1 -> v7
+    ; nextln: v12 = uload8.i32 notrap aligned v5+8
+    ; nextln: v8 = ireduce.i8 v12
+    ; nextln: v2 -> v8
+    ; nextln: v9 = load.i64 notrap aligned v5+16
+    ; nextln: v3 -> v9
+
+    return
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
new file mode 100644
index 0000000000..385cc9d27c
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
@@ -0,0 +1,18 @@
+test compile
+target x86_64 haswell
+
+function %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 {
+ebb0(v0: i32, v1: i64, v2: f32, v3: f64):
+    return v0, v1, v2, v3
+}
+
+function %caller() {
+    fn0 = %returner(i32, i64, f32, f64) -> i32, i64, f32, f64
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 1
+    v2 = f32const 0x2.0
+    v3 = f64const 0x3.0
+    v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
+    return
+}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif
new file mode 100644
index 0000000000..f19b1bcbf0
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif
@@ -0,0 +1,34 @@
+test compile
+target x86_64 haswell
+
+function %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    v4 = iconst.i32 4
+    v5 = iconst.i32 5
+    v6 = iconst.i32 6
+    v7 = iconst.i32 7
+    v8 = iconst.i32 8
+    v9 = iconst.i32 9
+    v10 = iconst.i32 10
+    v11 = iconst.i32 11
+    v12 = iconst.i32 12
+    v13 = iconst.i32 13
+    v14 = iconst.i32 14
+    v15 = iconst.i32 15
+    v16 = iconst.i32 16
+    v17 = iconst.i32 17
+    v18 = iconst.i32 18
+    v19 = iconst.i32 19
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19
+}
+
+function %call_20_i32s() {
+    fn0 = %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+ebb0:
+    v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19 = call fn0()
+    return
+}