diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index c072594b61..d0308f73e8 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -245,24 +245,17 @@ pub fn legalize_signature( isa_flags, ); - let sig_is_multi_return = sig.is_multi_return(); - - // If this is a multi-value return and we don't have enough available return - // registers to fit all of the return values, we need to backtrack and start + // If we don't have enough available return registers + // to fit all of the return values, we need to backtrack and start // assigning locations all over again with a different strategy. In order to // do that, we need a copy of the original assigner for the returns. - let backup_rets_for_struct_return = if sig_is_multi_return { - Some(rets.clone()) - } else { - None - }; + let mut backup_rets = rets.clone(); if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - if sig.is_multi_return() - && new_returns - .iter() - .filter(|r| r.purpose == ArgumentPurpose::Normal) - .any(|r| !r.location.is_reg()) + if new_returns + .iter() + .filter(|r| r.purpose == ArgumentPurpose::Normal) + .any(|r| !r.location.is_reg()) { // The return values couldn't all fit into available return // registers. Introduce the use of a struct-return parameter. @@ -283,8 +276,6 @@ pub fn legalize_signature( _ => unreachable!("return pointer should always get a register assignment"), } - let mut backup_rets = backup_rets_for_struct_return.unwrap(); - // We're using the first return register for the return pointer (like // sys v does). let mut ret_ptr_return = AbiParam { diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs index 7fb977a06a..185e4c74fa 100644 --- a/cranelift/codegen/src/legalizer/boundary.rs +++ b/cranelift/codegen/src/legalizer/boundary.rs @@ -757,12 +757,6 @@ pub fn handle_call_abi( { legalize_sret_call(isa, pos, sig_ref, inst); } else { - // OK, we need to fix the call arguments to match the ABI signature. - let abi_args = pos.func.dfg.signatures[sig_ref].params.len(); - legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { - func.dfg.signatures[sig_ref].params[abi_arg] - }); - if !pos.func.dfg.signatures[sig_ref].returns.is_empty() { inst = legalize_inst_results(pos, |func, abi_res| { func.dfg.signatures[sig_ref].returns[abi_res] @@ -770,6 +764,13 @@ pub fn handle_call_abi( } } + // Go back and fix the call arguments to match the ABI signature. + pos.goto_inst(inst); + let abi_args = pos.func.dfg.signatures[sig_ref].params.len(); + legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { + func.dfg.signatures[sig_ref].params[abi_arg] + }); + debug_assert!( check_call_signature(&pos.func.dfg, inst).is_ok(), "Signature still wrong: {}, {}{}", diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif index 9494e78c67..d99761a4dc 100644 --- a/cranelift/filetests/filetests/isa/x86/abi64.clif +++ b/cranelift/filetests/filetests/isa/x86/abi64.clif @@ -14,6 +14,12 @@ function %f() { sig2 = (f32, i64) -> f64 system_v ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v + sig3 = () -> i128 system_v + ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v + + sig4 = (i128) -> i128 system_v + ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v + block0: return } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif index 8e8d356479..cb1856ca3d 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif @@ -110,6 +110,13 @@ block0(v0: f32, v1: f64, v2: i64, v3: i64): ; nextln: return v1, v5 ; nextln: } +function %ret_val_i128(i64, i64) -> i128 windows_fastcall { +block0(v0: i64, v1: i64): + v2 = iconcat v0, v1 + return v2 +} +; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall { + function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall { fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif index f4919f4781..6d07f32631 100644 --- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif +++ b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif @@ -1,5 +1,5 @@ test legalizer -target i686 +target x86_64 haswell function %foo() -> i128 { block0: @@ -10,22 +10,12 @@ block0: return v4 } -; check: v5 = iconst.i32 66 -; check: v6 = iconst.i32 100 -; check: v1 = iconcat v5, v6 -; check: v7 = iconst.i32 0x1010_0042 -; check: v8 = iconst.i32 127 -; check: v2 = iconcat v7, v8 +; check: v1 = iconst.i64 0x0064_0000_0042 +; check: v2 = iconst.i64 0x007f_1010_0042 ; check: v3 = iconcat v1, v2 -; check: v9 = popcnt v1 -; check: v10 = popcnt v2 -; check: v12, v13 = isplit v9 -; check: v14, v15 = isplit v10 -; check: v16, v17 = iadd_ifcout v12, v14 -; check: v18 = iadd_ifcin v13, v15, v17 -; check: v11 = iconcat v16, v18 -; check: v20 = iconst.i32 0 -; check: v21 = iconst.i32 0 -; check: v19 = iconcat v20, v21 -; check: v4 = iconcat v11, v19 -; check: return v16, v18, v20, v21 +; check: v5 = popcnt v1 +; check: v6 = popcnt v2 +; check: v7 = iadd v5, v6 +; check: v8 = iconst.i64 0 +; check: v4 = iconcat v7, v8 +; check: return v7, v8 diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif new file mode 100644 index 0000000000..b57090d851 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif @@ -0,0 +1,24 @@ +test legalizer +target x86_64 haswell + +;; Test if arguments are legalized if function uses sret + +function %call_indirect_with_split_arg(i64, i64, i64) { + ; check: ss0 = sret_slot 32 + sig0 = (i128) -> i64, i64, i64, i64 + ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast +block0(v0: i64, v1: i64, v2: i64): + v3 = iconcat v1, v2 + v4, v5, v6, v7 = call_indirect sig0, v0(v3) + ; check: v8 = stack_addr.i64 ss0 + ; check: v9 = call_indirect sig0, v0(v1, v2, v8) + ; check: v10 = load.i64 notrap aligned v9 + ; check: v4 -> v10 + ; check: v11 = load.i64 notrap aligned v9+8 + ; check: v5 -> v11 + ; check: v12 = load.i64 notrap aligned v9+16 + ; check: v6 -> v12 + ; check: v13 = load.i64 notrap aligned v9+24 + ; check: v7 -> v13 + return +}