diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index 1ddc445927..da5b330e6b 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -216,6 +216,25 @@ pub(crate) fn define() -> SettingGroup { 0, ); + settings.add_bool( + "enable_llvm_abi_extensions", + r#" + Enable various ABI extensions defined by LLVM's behavior. + + In some cases, LLVM's implementation of an ABI (calling convention) + goes beyond a standard and supports additional argument types or + behavior. This option instructs Cranelift codegen to follow LLVM's + behavior where applicable. + + Currently, this applies only to Windows Fastcall on x86-64, and + allows an `i128` argument to be spread across two 64-bit integer + registers. The Fastcall implementation otherwise does not support + `i128` arguments, and will panic if they are present and this + option is not set. + "#, + false, + ); + // BaldrMonkey requires that not-yet-relocated function addresses be encoded // as all-ones bitpatterns. settings.add_bool( diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 8b371cb159..c236d707d5 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -78,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt match ¶m.purpose { &ir::ArgumentPurpose::VMContext => { // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg { - regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()), - ty: ir::types::I64, - extension: param.extension, - purpose: param.purpose, - }) + Some(ABIArg::reg( + xreg(BALDRDASH_TLS_REG).to_real_reg(), + ir::types::I64, + param.extension, + param.purpose, + )) } &ir::ArgumentPurpose::SignatureId => { // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg { - regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()), - ty: ir::types::I64, - extension: param.extension, - purpose: param.purpose, - }) + Some(ABIArg::reg( + xreg(BALDRDASH_SIG_REG).to_real_reg(), + ir::types::I64, + param.extension, + param.purpose, + )) } &ir::ArgumentPurpose::CalleeTLS => { // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack { - offset: BALDRDASH_CALLEE_TLS_OFFSET, - ty: ir::types::I64, - extension: ir::ArgumentExtension::None, - purpose: param.purpose, - }) + Some(ABIArg::stack( + BALDRDASH_CALLEE_TLS_OFFSET, + ir::types::I64, + ir::ArgumentExtension::None, + param.purpose, + )) } &ir::ArgumentPurpose::CallerTLS => { // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack { - offset: BALDRDASH_CALLER_TLS_OFFSET, - ty: ir::types::I64, - extension: ir::ArgumentExtension::None, - purpose: param.purpose, - }) + Some(ABIArg::stack( + BALDRDASH_CALLER_TLS_OFFSET, + ir::types::I64, + ir::ArgumentExtension::None, + param.purpose, + )) } _ => None, } @@ -161,6 +161,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fn compute_arg_locs( call_conv: isa::CallConv, + _flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, @@ -253,12 +254,12 @@ impl ABIMachineSpec for AArch64MachineDeps { RegClass::V128 => vreg(*next_reg), _ => unreachable!(), }; - ret.push(ABIArg::Reg { - regs: ValueRegs::one(reg.to_real_reg()), - ty: param.value_type, - extension: param.extension, - purpose: param.purpose, - }); + ret.push(ABIArg::reg( + reg.to_real_reg(), + param.value_type, + param.extension, + param.purpose, + )); *next_reg += 1; remaining_reg_vals -= 1; } else { @@ -268,13 +269,13 @@ impl ABIMachineSpec for AArch64MachineDeps { let size = std::cmp::max(size, 8); // Align. debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack { - offset: next_stack as i64, - ty: param.value_type, - extension: param.extension, - purpose: param.purpose, - }); + next_stack = align_to(next_stack, size); + ret.push(ABIArg::stack( + next_stack as i64, + param.value_type, + param.extension, + param.purpose, + )); next_stack += size; } } @@ -286,19 +287,19 @@ impl ABIMachineSpec for AArch64MachineDeps { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 { - ret.push(ABIArg::Reg { - regs: ValueRegs::one(xreg(next_xreg).to_real_reg()), - ty: I64, - extension: ir::ArgumentExtension::None, - purpose: ir::ArgumentPurpose::Normal, - }); + ret.push(ABIArg::reg( + xreg(next_xreg).to_real_reg(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); } else { - ret.push(ABIArg::Stack { - offset: next_stack as i64, - ty: I64, - extension: ir::ArgumentExtension::None, - purpose: ir::ArgumentPurpose::Normal, - }); + ret.push(ABIArg::stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); next_stack += 8; } Some(ret.len() - 1) @@ -306,7 +307,7 @@ impl ABIMachineSpec for AArch64MachineDeps { None }; - next_stack = (next_stack + 15) & !15; + next_stack = align_to(next_stack, 16); // To avoid overflow issues, limit the arg/return size to something // reasonable -- here, 128 MB. diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index e1a64aeb76..c446c9e576 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -51,6 +51,7 @@ impl ABIMachineSpec for Arm32MachineDeps { fn compute_arg_locs( _call_conv: isa::CallConv, + _flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, @@ -81,12 +82,12 @@ impl ABIMachineSpec for Arm32MachineDeps { if next_rreg < max_reg_val { let reg = rreg(next_rreg); - ret.push(ABIArg::Reg { - regs: ValueRegs::one(reg.to_real_reg()), - ty: param.value_type, - extension: param.extension, - purpose: param.purpose, - }); + ret.push(ABIArg::reg( + reg.to_real_reg(), + param.value_type, + param.extension, + param.purpose, + )); next_rreg += 1; } else { // Arguments are stored on stack in reversed order. @@ -101,12 +102,12 @@ impl ABIMachineSpec for Arm32MachineDeps { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_rreg < max_reg_val { - ret.push(ABIArg::Reg { - regs: ValueRegs::one(rreg(next_rreg).to_real_reg()), - ty: I32, - extension: ir::ArgumentExtension::None, - purpose: ir::ArgumentPurpose::Normal, - }); + ret.push(ABIArg::reg( + rreg(next_rreg).to_real_reg(), + I32, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); } else { stack_args.push(( I32, @@ -124,12 +125,12 @@ impl ABIMachineSpec for Arm32MachineDeps { let max_stack = next_stack; for (ty, ext, purpose) in stack_args.into_iter().rev() { next_stack -= 4; - ret.push(ABIArg::Stack { - offset: (max_stack - next_stack) as i64, + ret.push(ABIArg::stack( + (max_stack - next_stack) as i64, ty, - extension: ext, + ext, purpose, - }); + )); } assert_eq!(next_stack, 0); diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index cf106ea4e5..165e1980b8 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg { - regs: ValueRegs::one(regs::r14().to_real_reg()), - ty: types::I64, - extension: param.extension, - purpose: param.purpose, - }) + Some(ABIArg::reg( + regs::r14().to_real_reg(), + types::I64, + param.extension, + param.purpose, + )) } &ir::ArgumentPurpose::SignatureId => { // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg { - regs: ValueRegs::one(regs::r10().to_real_reg()), - ty: types::I64, - extension: param.extension, - purpose: param.purpose, - }) + Some(ABIArg::reg( + regs::r10().to_real_reg(), + types::I64, + param.extension, + param.purpose, + )) } &ir::ArgumentPurpose::CalleeTLS => { // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack { - offset: BALDRDASH_CALLEE_TLS_OFFSET, - ty: ir::types::I64, - extension: ir::ArgumentExtension::None, - purpose: param.purpose, - }) + Some(ABIArg::stack( + BALDRDASH_CALLEE_TLS_OFFSET, + ir::types::I64, + ir::ArgumentExtension::None, + param.purpose, + )) } &ir::ArgumentPurpose::CallerTLS => { // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack { - offset: BALDRDASH_CALLER_TLS_OFFSET, - ty: ir::types::I64, - extension: ir::ArgumentExtension::None, - purpose: param.purpose, - }) + Some(ABIArg::stack( + BALDRDASH_CALLER_TLS_OFFSET, + ir::types::I64, + ir::ArgumentExtension::None, + param.purpose, + )) } _ => None, } @@ -97,18 +97,30 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn compute_arg_locs( call_conv: isa::CallConv, + flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, ) -> CodegenResult<(Vec, i64, Option)> { let is_baldrdash = call_conv.extends_baldrdash(); + let is_fastcall = call_conv.extends_windows_fastcall(); let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020; let mut next_gpr = 0; let mut next_vreg = 0; let mut next_stack: u64 = 0; + let mut next_param_idx = 0; // Fastcall cares about overall param index let mut ret = vec![]; + if args_or_rets == ArgsOrRets::Args && is_fastcall { + // Fastcall always reserves 32 bytes of shadow space corresponding to + // the four initial in-arg parameters. + // + // (See: + // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160) + next_stack = 32; + } + if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls { // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and // caller TLS-register values, respectively. @@ -159,72 +171,92 @@ impl ABIMachineSpec for X64ABIMachineSpec { } // Find regclass(es) of the register(s) used to store a value of this type. - let (rcs, _) = Inst::rc_for_type(param.value_type)?; - let intreg = rcs[0] == RegClass::I64; - let num_regs = rcs.len(); - assert!(num_regs <= 2); - if num_regs == 2 { - assert_eq!(rcs[0], rcs[1]); + let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; + + // Now assign ABIArgSlots for each register-sized part. + // + // Note that the handling of `i128` values is unique here: + // + // - If `enable_llvm_abi_extensions` is set in the flags, each + // `i128` is split into two `i64`s and assigned exactly as if it + // were two consecutive 64-bit args. This is consistent with LLVM's + // behavior, and is needed for some uses of Cranelift (e.g., the + // rustc backend). + // + // - Otherwise, both SysV and Fastcall specify behavior (use of + // vector register, a register pair, or passing by reference + // depending on the case), but for simplicity, we will just panic if + // an i128 type appears in a signature and the LLVM extensions flag + // is not set. + // + // For examples of how rustc compiles i128 args and return values on + // both SysV and Fastcall platforms, see: + // https://godbolt.org/z/PhG3ob + + if param.value_type.bits() > 64 + && !param.value_type.is_vector() + && !flags.enable_llvm_abi_extensions() + { + panic!( + "i128 args/return values not supported unless LLVM ABI extensions are enabled" + ); } - let mut regs: SmallVec<[RealReg; 2]> = smallvec![]; - for j in 0..num_regs { + let mut slots = vec![]; + for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { + let intreg = *rc == RegClass::I64; let nextreg = if intreg { match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j), + ArgsOrRets::Args => { + get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) + } ArgsOrRets::Rets => { - get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j) + get_intreg_for_retval(&call_conv, next_gpr, next_param_idx) } } } else { match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j), + ArgsOrRets::Args => { + get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx) + } ArgsOrRets::Rets => { - get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j) + get_fltreg_for_retval(&call_conv, next_vreg, next_param_idx) } } }; + next_param_idx += 1; if let Some(reg) = nextreg { - regs.push(reg.to_real_reg()); + if intreg { + next_gpr += 1; + } else { + next_vreg += 1; + } + slots.push(ABIArgSlot::Reg { + reg: reg.to_real_reg(), + ty: *reg_ty, + extension: param.extension, + }); } else { - regs.clear(); - break; + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (reg_ty.bits() / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + slots.push(ABIArgSlot::Stack { + offset: next_stack as i64, + ty: *reg_ty, + extension: param.extension, + }); + next_stack += size; } } - if regs.len() > 0 { - let regs = match num_regs { - 1 => ValueRegs::one(regs[0]), - 2 => ValueRegs::two(regs[0], regs[1]), - _ => panic!("More than two registers unexpected"), - }; - ret.push(ABIArg::Reg { - regs, - ty: param.value_type, - extension: param.extension, - purpose: param.purpose, - }); - if intreg { - next_gpr += num_regs; - } else { - next_vreg += num_regs; - } - } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) - let size = (param.value_type.bits() / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack { - offset: next_stack as i64, - ty: param.value_type, - extension: param.extension, - purpose: param.purpose, - }); - next_stack += size; - } + ret.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); } if args_or_rets == ArgsOrRets::Rets && is_baldrdash { @@ -233,20 +265,20 @@ impl ABIMachineSpec for X64ABIMachineSpec { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { - ret.push(ABIArg::Reg { - regs: ValueRegs::one(reg.to_real_reg()), - ty: types::I64, - extension: ir::ArgumentExtension::None, - purpose: ir::ArgumentPurpose::Normal, - }); + if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) { + ret.push(ABIArg::reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); } else { - ret.push(ABIArg::Stack { - offset: next_stack as i64, - ty: types::I64, - extension: ir::ArgumentExtension::None, - purpose: ir::ArgumentPurpose::Normal, - }); + ret.push(ABIArg::stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); next_stack += 8; } Some(ret.len() - 1) @@ -254,7 +286,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { None }; - next_stack = (next_stack + 15) & !15; + next_stack = align_to(next_stack, 16); // To avoid overflow issues, limit the arg/return size to something reasonable. if next_stack > STACK_ARG_RET_SIZE_LIMIT { @@ -452,10 +484,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { // registers (all XMM registers are caller-save) so we can compute the // total size of the needed stack space easily. let clobbered = get_callee_saves(&call_conv, clobbers); - let clobbered_size = 8 * clobbered.len() as u32; - let stack_size = clobbered_size + fixed_frame_storage_size; + let stack_size = compute_clobber_size(&clobbered) + fixed_frame_storage_size; // Align to 16 bytes. - let stack_size = (stack_size + 15) & !15; + let stack_size = align_to(stack_size, 16); let clobbered_size = stack_size - fixed_frame_storage_size; // Adjust the stack pointer downward with one `sub rsp, IMM` // instruction. @@ -473,16 +504,23 @@ impl ABIMachineSpec for X64ABIMachineSpec { let r_reg = reg.to_reg(); match r_reg.get_class() { RegClass::I64 => { - insts.push(Inst::mov_r_m( - OperandSize::Size64, + insts.push(Inst::store( + types::I64, r_reg.to_reg(), Amode::imm_reg(cur_offset, regs::rsp()), )); cur_offset += 8; } - // No XMM regs are callee-save, so we do not need to implement - // this. - _ => unimplemented!(), + RegClass::V128 => { + cur_offset = align_to(cur_offset, 16); + insts.push(Inst::store( + types::I8X16, + r_reg.to_reg(), + Amode::imm_reg(cur_offset, regs::rsp()), + )); + cur_offset += 16; + } + _ => unreachable!(), } } @@ -499,8 +537,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { let mut insts = SmallVec::new(); let clobbered = get_callee_saves(&call_conv, clobbers); - let stack_size = 8 * clobbered.len() as u32; - let stack_size = (stack_size + 15) & !15; + let stack_size = compute_clobber_size(&clobbered); + let stack_size = align_to(stack_size, 16); // Restore regs by loading from offsets of RSP. let mut cur_offset = 0; @@ -514,7 +552,17 @@ impl ABIMachineSpec for X64ABIMachineSpec { )); cur_offset += 8; } - _ => unimplemented!(), + RegClass::V128 => { + cur_offset = align_to(cur_offset, 16); + insts.push(Inst::load( + types::I8X16, + Amode::imm_reg(cur_offset, regs::rsp()), + Writable::from_reg(rreg.to_reg()), + ExtKind::None, + )); + cur_offset += 16; + } + _ => unreachable!(), } } // Adjust RSP back upward. @@ -592,14 +640,14 @@ impl ABIMachineSpec for X64ABIMachineSpec { // Baldrdash should not use struct args. assert!(!call_conv.extends_baldrdash()); let mut insts = SmallVec::new(); - let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap(); - let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap(); - let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap(); + let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap(); + let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap(); + let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap(); // We need a register to load the address of `memcpy()` below and we // don't have a lowering context to allocate a temp here; so just use a // register we know we are free to mutate as part of this sequence // (because it is clobbered by the call as per the ABI anyway). - let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap(); + let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap(); insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64)); insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64)); insts.extend( @@ -648,10 +696,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { let mut caller_saved = vec![ - // Systemv calling convention: - // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). - Writable::from_reg(regs::rsi()), - Writable::from_reg(regs::rdi()), + // intersection of Systemv and FastCall calling conventions: + // - GPR: all except RDI, RSI, RBX, RBP, R12 to R15. + // SysV adds RDI, RSI (FastCall makes these callee-saved). Writable::from_reg(regs::rax()), Writable::from_reg(regs::rcx()), Writable::from_reg(regs::rdx()), @@ -659,25 +706,30 @@ impl ABIMachineSpec for X64ABIMachineSpec { Writable::from_reg(regs::r9()), Writable::from_reg(regs::r10()), Writable::from_reg(regs::r11()), - // - XMM: all the registers! + // - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15). Writable::from_reg(regs::xmm0()), Writable::from_reg(regs::xmm1()), Writable::from_reg(regs::xmm2()), Writable::from_reg(regs::xmm3()), Writable::from_reg(regs::xmm4()), Writable::from_reg(regs::xmm5()), - Writable::from_reg(regs::xmm6()), - Writable::from_reg(regs::xmm7()), - Writable::from_reg(regs::xmm8()), - Writable::from_reg(regs::xmm9()), - Writable::from_reg(regs::xmm10()), - Writable::from_reg(regs::xmm11()), - Writable::from_reg(regs::xmm12()), - Writable::from_reg(regs::xmm13()), - Writable::from_reg(regs::xmm14()), - Writable::from_reg(regs::xmm15()), ]; + if !call_conv_of_callee.extends_windows_fastcall() { + caller_saved.push(Writable::from_reg(regs::rsi())); + caller_saved.push(Writable::from_reg(regs::rdi())); + caller_saved.push(Writable::from_reg(regs::xmm6())); + caller_saved.push(Writable::from_reg(regs::xmm7())); + caller_saved.push(Writable::from_reg(regs::xmm8())); + caller_saved.push(Writable::from_reg(regs::xmm9())); + caller_saved.push(Writable::from_reg(regs::xmm10())); + caller_saved.push(Writable::from_reg(regs::xmm11())); + caller_saved.push(Writable::from_reg(regs::xmm12())); + caller_saved.push(Writable::from_reg(regs::xmm13())); + caller_saved.push(Writable::from_reg(regs::xmm14())); + caller_saved.push(Writable::from_reg(regs::xmm15())); + } + if call_conv_of_callee.extends_baldrdash() { caller_saved.push(Writable::from_reg(regs::r12())); caller_saved.push(Writable::from_reg(regs::r13())); @@ -739,49 +791,67 @@ impl From for SyntheticAmode { } } -fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { - match call_conv { +fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option { + let is_fastcall = match call_conv { CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV - | CallConv::Baldrdash2020 => {} - _ => panic!("int args only supported for SysV calling convention"), + | CallConv::Baldrdash2020 => false, + CallConv::WindowsFastcall => true, + _ => panic!("int args only supported for SysV or Fastcall calling convention"), }; - match idx { - 0 => Some(regs::rdi()), - 1 => Some(regs::rsi()), - 2 => Some(regs::rdx()), - 3 => Some(regs::rcx()), - 4 => Some(regs::r8()), - 5 => Some(regs::r9()), + + // Fastcall counts by absolute argument number; SysV counts by argument of + // this (integer) class. + let i = if is_fastcall { arg_idx } else { idx }; + match (i, is_fastcall) { + (0, false) => Some(regs::rdi()), + (1, false) => Some(regs::rsi()), + (2, false) => Some(regs::rdx()), + (3, false) => Some(regs::rcx()), + (4, false) => Some(regs::r8()), + (5, false) => Some(regs::r9()), + (0, true) => Some(regs::rcx()), + (1, true) => Some(regs::rdx()), + (2, true) => Some(regs::r8()), + (3, true) => Some(regs::r9()), _ => None, } } -fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { - match call_conv { +fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option { + let is_fastcall = match call_conv { CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::BaldrdashSystemV - | CallConv::Baldrdash2020 => {} - _ => panic!("float args only supported for SysV calling convention"), + | CallConv::Baldrdash2020 => false, + CallConv::WindowsFastcall => true, + _ => panic!("float args only supported for SysV or Fastcall calling convention"), }; - match idx { - 0 => Some(regs::xmm0()), - 1 => Some(regs::xmm1()), - 2 => Some(regs::xmm2()), - 3 => Some(regs::xmm3()), - 4 => Some(regs::xmm4()), - 5 => Some(regs::xmm5()), - 6 => Some(regs::xmm6()), - 7 => Some(regs::xmm7()), + + // Fastcall counts by absolute argument number; SysV counts by argument of + // this (floating-point) class. + let i = if is_fastcall { arg_idx } else { idx }; + match (i, is_fastcall) { + (0, false) => Some(regs::xmm0()), + (1, false) => Some(regs::xmm1()), + (2, false) => Some(regs::xmm2()), + (3, false) => Some(regs::xmm3()), + (4, false) => Some(regs::xmm4()), + (5, false) => Some(regs::xmm5()), + (6, false) => Some(regs::xmm6()), + (7, false) => Some(regs::xmm7()), + (0, true) => Some(regs::xmm0()), + (1, true) => Some(regs::xmm1()), + (2, true) => Some(regs::xmm2()), + (3, true) => Some(regs::xmm3()), _ => None, } } -fn get_intreg_for_retval_systemv( +fn get_intreg_for_retval( call_conv: &CallConv, intreg_idx: usize, retval_idx: usize, @@ -799,11 +869,16 @@ fn get_intreg_for_retval_systemv( None } } - CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), + CallConv::WindowsFastcall => match intreg_idx { + 0 => Some(regs::rax()), + 1 => Some(regs::rdx()), // The Rust ABI for i128s needs this. + _ => None, + }, + CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } -fn get_fltreg_for_retval_systemv( +fn get_fltreg_for_retval( call_conv: &CallConv, fltreg_idx: usize, retval_idx: usize, @@ -821,7 +896,11 @@ fn get_fltreg_for_retval_systemv( None } } - CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), + CallConv::WindowsFastcall => match fltreg_idx { + 0 => Some(regs::xmm0()), + _ => None, + }, + CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } @@ -854,6 +933,21 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool { } } +fn is_callee_save_fastcall(r: RealReg) -> bool { + use regs::*; + match r.get_class() { + RegClass::I64 => match r.get_hw_encoding() as u8 { + ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, + _ => false, + }, + RegClass::V128 => match r.get_hw_encoding() as u8 { + 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true, + _ => false, + }, + _ => panic!("Unknown register class: {:?}", r.get_class()), + } +} + fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec> { let mut regs: Vec> = match call_conv { CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs @@ -869,7 +963,11 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec< .cloned() .filter(|r| is_callee_save_systemv(r.to_reg())) .collect(), - CallConv::WindowsFastcall => todo!("windows fastcall"), + CallConv::WindowsFastcall => regs + .iter() + .cloned() + .filter(|r| is_callee_save_fastcall(r.to_reg())) + .collect(), CallConv::Probestack => todo!("probestack?"), }; // Sort registers for deterministic code output. We can do an unstable sort because the @@ -877,3 +975,20 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec< regs.sort_unstable_by_key(|r| r.to_reg().get_index()); regs } + +fn compute_clobber_size(clobbers: &Vec>) -> u32 { + let mut clobbered_size = 0; + for reg in clobbers { + match reg.to_reg().get_class() { + RegClass::I64 => { + clobbered_size += 8; + } + RegClass::V128 => { + clobbered_size = align_to(clobbered_size, 16); + clobbered_size += 16; + } + _ => unreachable!(), + } + } + clobbered_size +} diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 930839459b..b89d25cb76 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -456,6 +456,7 @@ pub(crate) enum InstructionSet { Popcnt, Lzcnt, BMI1, + #[allow(dead_code)] // never constructed (yet). BMI2, } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 1d9e30bd3a..ddcf3adc2d 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -23,11 +23,20 @@ use regalloc::{ }; use std::string::String; -// Hardware encodings for a few registers. +// Hardware encodings (note the special rax, rcx, rdx, rbx order). +pub const ENC_RAX: u8 = 0; +pub const ENC_RCX: u8 = 1; +pub const ENC_RDX: u8 = 2; pub const ENC_RBX: u8 = 3; pub const ENC_RSP: u8 = 4; pub const ENC_RBP: u8 = 5; +pub const ENC_RSI: u8 = 6; +pub const ENC_RDI: u8 = 7; +pub const ENC_R8: u8 = 8; +pub const ENC_R9: u8 = 9; +pub const ENC_R10: u8 = 10; +pub const ENC_R11: u8 = 11; pub const ENC_R12: u8 = 12; pub const ENC_R13: u8 = 13; pub const ENC_R14: u8 = 14; @@ -38,31 +47,31 @@ fn gpr(enc: u8, index: u8) -> Reg { } pub(crate) fn rsi() -> Reg { - gpr(6, 16) + gpr(ENC_RSI, 16) } pub(crate) fn rdi() -> Reg { - gpr(7, 17) + gpr(ENC_RDI, 17) } pub(crate) fn rax() -> Reg { - gpr(0, 18) + gpr(ENC_RAX, 18) } pub(crate) fn rcx() -> Reg { - gpr(1, 19) + gpr(ENC_RCX, 19) } pub(crate) fn rdx() -> Reg { - gpr(2, 20) + gpr(ENC_RDX, 20) } pub(crate) fn r8() -> Reg { - gpr(8, 21) + gpr(ENC_R8, 21) } pub(crate) fn r9() -> Reg { - gpr(9, 22) + gpr(ENC_R9, 22) } pub(crate) fn r10() -> Reg { - gpr(10, 23) + gpr(ENC_R10, 23) } pub(crate) fn r11() -> Reg { - gpr(11, 24) + gpr(ENC_R11, 24) } pub(crate) fn r12() -> Reg { gpr(ENC_R12, 25) diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index a0d4634078..e11adbb01b 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -124,19 +124,18 @@ use std::convert::TryFrom; use std::marker::PhantomData; use std::mem; -/// A location for an argument or return value. -#[derive(Clone, Copy, Debug)] -pub enum ABIArg { - /// In a real register (or set of registers). +/// A location for (part of) an argument or return value. These "storage slots" +/// are specified for each register-sized part of an argument. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ABIArgSlot { + /// In a real register. Reg { - /// Register(s) that hold this arg. - regs: ValueRegs, + /// Register that holds this arg. + reg: RealReg, /// Value type of this arg. ty: ir::Type, /// Should this arg be zero- or sign-extended? extension: ir::ArgumentExtension, - /// Purpose of this arg. - purpose: ir::ArgumentPurpose, }, /// Arguments only: on stack, at given offset from SP at entry. Stack { @@ -146,6 +145,26 @@ pub enum ABIArg { ty: ir::Type, /// Should this arg be zero- or sign-extended? extension: ir::ArgumentExtension, + }, +} + +/// An ABIArg is composed of one or more parts. This allows for a CLIF-level +/// Value to be passed with its parts in more than one location at the ABI +/// level. For example, a 128-bit integer may be passed in two 64-bit registers, +/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The +/// number of "parts" should correspond to the number of registers used to store +/// this type according to the machine backend. +/// +/// As an invariant, the `purpose` for every part must match. As a further +/// invariant, a `StructArg` part cannot appear with any other part. +#[derive(Clone, Debug)] +pub enum ABIArg { + /// Storage slots (registers or stack locations) for each part of the + /// argument value. The number of slots must equal the number of register + /// parts used to store a value of this type. + Slots { + /// Slots, one per register part. + slots: Vec, /// Purpose of this arg. purpose: ir::ArgumentPurpose, }, @@ -167,21 +186,50 @@ pub enum ABIArg { impl ABIArg { /// Get the purpose of this arg. - fn get_purpose(self) -> ir::ArgumentPurpose { + fn get_purpose(&self) -> ir::ArgumentPurpose { match self { - ABIArg::Reg { purpose, .. } => purpose, - ABIArg::Stack { purpose, .. } => purpose, - ABIArg::StructArg { purpose, .. } => purpose, + &ABIArg::Slots { purpose, .. } => purpose, + &ABIArg::StructArg { purpose, .. } => purpose, } } /// Is this a StructArg? - fn is_struct_arg(self) -> bool { + fn is_struct_arg(&self) -> bool { match self { - ABIArg::StructArg { .. } => true, + &ABIArg::StructArg { .. } => true, _ => false, } } + + /// Create an ABIArg from one register. + pub fn reg( + reg: RealReg, + ty: ir::Type, + extension: ir::ArgumentExtension, + purpose: ir::ArgumentPurpose, + ) -> ABIArg { + ABIArg::Slots { + slots: vec![ABIArgSlot::Reg { reg, ty, extension }], + purpose, + } + } + + /// Create an ABIArg from one stack slot. + pub fn stack( + offset: i64, + ty: ir::Type, + extension: ir::ArgumentExtension, + purpose: ir::ArgumentPurpose, + ) -> ABIArg { + ABIArg::Slots { + slots: vec![ABIArgSlot::Stack { + offset, + ty, + extension, + }], + purpose, + } + } } /// Are we computing information about arguments or return values? Much of the @@ -275,6 +323,7 @@ pub trait ABIMachineSpec { /// index of the extra synthetic arg that was added. fn compute_arg_locs( call_conv: isa::CallConv, + flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, @@ -461,11 +510,15 @@ struct ABISig { } impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { + fn from_func_sig( + sig: &ir::Signature, + flags: &settings::Flags, + ) -> CodegenResult { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. let (rets, stack_ret_space, _) = M::compute_arg_locs( sig.call_conv, + flags, &sig.returns, ArgsOrRets::Rets, /* extra ret-area ptr = */ false, @@ -473,6 +526,7 @@ impl ABISig { let need_stack_return_area = stack_ret_space > 0; let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs( sig.call_conv, + flags, &sig.params, ArgsOrRets::Args, need_stack_return_area, @@ -557,8 +611,11 @@ fn get_special_purpose_param_register( purpose: ir::ArgumentPurpose, ) -> Option { let idx = f.signature.special_param_index(purpose)?; - match abi.args[idx] { - ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()), + match &abi.args[idx] { + &ABIArg::Slots { ref slots, .. } => match &slots[0] { + &ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()), + _ => None, + }, _ => None, } } @@ -569,7 +626,7 @@ impl ABICalleeImpl { debug!("ABI: func signature {:?}", f.signature); let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); - let sig = ABISig::from_func_sig::(&ir_sig)?; + let sig = ABISig::from_func_sig::(&ir_sig, &flags)?; let call_conv = f.signature.call_conv; // Only these calling conventions are supported. @@ -577,7 +634,8 @@ impl ABICalleeImpl { call_conv == isa::CallConv::SystemV || call_conv == isa::CallConv::Fast || call_conv == isa::CallConv::Cold - || call_conv.extends_baldrdash(), + || call_conv.extends_baldrdash() + || call_conv.extends_windows_fastcall(), "Unsupported calling convention: {:?}", call_conv ); @@ -776,19 +834,6 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> } } -fn gen_move_multi( - dst: ValueRegs>, - src: ValueRegs, - ty: Type, -) -> SmallInstVec { - let mut ret = smallvec![]; - let (_, tys) = M::I::rc_for_type(ty).unwrap(); - for ((&dst, &src), &ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) { - ret.push(M::gen_move(dst, src, ty)); - } - ret -} - fn gen_load_stack_multi( from: StackAMode, dst: ValueRegs>, @@ -821,22 +866,6 @@ fn gen_store_stack_multi( ret } -fn gen_store_base_offset_multi( - base: Reg, - mut offset: i32, - src: ValueRegs, - ty: Type, -) -> SmallInstVec { - let mut ret = smallvec![]; - let (_, tys) = M::I::rc_for_type(ty).unwrap(); - // N.B.: registers are given in the `ValueRegs` in target endian order. - for (&src, &ty) in src.regs().iter().zip(tys.iter()) { - ret.push(M::gen_store_base_offset(base, offset, src, ty)); - offset += ty.bytes() as i32; - } - ret -} - fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { let params_structret = sig .params @@ -892,10 +921,12 @@ impl ABICallee for ABICalleeImpl { fn liveins(&self) -> Set { let mut set: Set = Set::empty(); - for &arg in &self.sig.args { - if let ABIArg::Reg { regs, .. } = arg { - for &r in regs.regs() { - set.insert(r); + for arg in &self.sig.args { + if let &ABIArg::Slots { ref slots, .. } = arg { + for slot in slots { + if let ABIArgSlot::Reg { reg, .. } = slot { + set.insert(*reg); + } } } } @@ -904,10 +935,12 @@ impl ABICallee for ABICalleeImpl { fn liveouts(&self) -> Set { let mut set: Set = Set::empty(); - for &ret in &self.sig.rets { - if let ABIArg::Reg { regs, .. } = ret { - for &r in regs.regs() { - set.insert(r); + for ret in &self.sig.rets { + if let &ABIArg::Slots { ref slots, .. } = ret { + for slot in slots { + if let ABIArgSlot::Reg { reg, .. } = slot { + set.insert(*reg); + } } } } @@ -935,29 +968,43 @@ impl ABICallee for ABICalleeImpl { idx: usize, into_regs: ValueRegs>, ) -> SmallInstVec { + let mut insts = smallvec![]; match &self.sig.args[idx] { - // Extension mode doesn't matter (we're copying out, not in; we - // ignore high bits by convention). - &ABIArg::Reg { regs, ty, .. } => { - gen_move_multi::(into_regs, regs.map(|r| r.to_reg()), ty) + &ABIArg::Slots { ref slots, .. } => { + assert_eq!(into_regs.len(), slots.len()); + for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { + match slot { + // Extension mode doesn't matter (we're copying out, not in; we + // ignore high bits by convention). + &ABIArgSlot::Reg { reg, ty, .. } => { + insts.push(M::gen_move(*into_reg, reg.to_reg(), ty)); + } + &ABIArgSlot::Stack { offset, ty, .. } => { + insts.push(M::gen_load_stack( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + ty, + ), + *into_reg, + ty, + )); + } + } + } } - &ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - ty, - ), - into_regs, - ty, - ), - &ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + &ABIArg::StructArg { offset, .. } => { + let into_reg = into_regs.only_reg().unwrap(); + insts.push(M::gen_get_stack_addr( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + I8, + ), + into_reg, I8, - ), - into_regs.only_reg().unwrap(), - I8, - )], + )); + } } + insts } fn arg_is_needed_in_body(&self, idx: usize) -> bool { @@ -978,87 +1025,84 @@ impl ABICallee for ABICalleeImpl { let mut ret = smallvec![]; let word_bits = M::word_bits() as u8; match &self.sig.rets[idx] { - &ABIArg::Reg { - regs, - ty, - extension, - .. - } => { - let from_bits = ty_bits(ty) as u8; - let dest_regs = writable_value_regs(regs.map(|r| r.to_reg())); - let ext = M::get_ext_mode(self.sig.call_conv, extension); - match (ext, from_bits) { - (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) - if n < word_bits => - { - let signed = ext == ArgumentExtension::Sext; - let dest_reg = dest_regs - .only_reg() - .expect("extension only possible from one-reg value"); - let from_reg = from_regs - .only_reg() - .expect("extension only possible from one-reg value"); - ret.push(M::gen_extend( - dest_reg, - from_reg.to_reg(), - signed, - from_bits, - /* to_bits = */ word_bits, - )); + &ABIArg::Slots { ref slots, .. } => { + assert_eq!(from_regs.len(), slots.len()); + for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) { + match slot { + &ABIArgSlot::Reg { + reg, ty, extension, .. + } => { + let from_bits = ty_bits(ty) as u8; + let ext = M::get_ext_mode(self.sig.call_conv, extension); + match (ext, from_bits) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) + if n < word_bits => + { + let signed = ext == ArgumentExtension::Sext; + ret.push(M::gen_extend( + Writable::from_reg(reg.to_reg()), + from_reg.to_reg(), + signed, + from_bits, + /* to_bits = */ word_bits, + )); + } + _ => { + ret.push(M::gen_move( + Writable::from_reg(reg.to_reg()), + from_reg.to_reg(), + ty, + )); + } + }; + } + &ABIArgSlot::Stack { + offset, + ty, + extension, + .. + } => { + let mut ty = ty; + let from_bits = ty_bits(ty) as u8; + // A machine ABI implementation should ensure that stack frames + // have "reasonable" size. All current ABIs for machinst + // backends (aarch64 and x64) enforce a 128MB limit. + let off = i32::try_from(offset).expect( + "Argument stack offset greater than 2GB; should hit impl limit first", + ); + let ext = M::get_ext_mode(self.sig.call_conv, extension); + // Trash the from_reg; it should be its last use. + match (ext, from_bits) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) + if n < word_bits => + { + assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class()); + let signed = ext == ArgumentExtension::Sext; + ret.push(M::gen_extend( + Writable::from_reg(from_reg.to_reg()), + from_reg.to_reg(), + signed, + from_bits, + /* to_bits = */ word_bits, + )); + // Store the extended version. + ty = M::word_type(); + } + _ => {} + }; + ret.push(M::gen_store_base_offset( + self.ret_area_ptr.unwrap().to_reg(), + off, + from_reg.to_reg(), + ty, + )); + } } - _ => ret.extend( - gen_move_multi::(dest_regs, non_writable_value_regs(from_regs), ty) - .into_iter(), - ), - }; + } } - &ABIArg::Stack { - offset, - ty, - extension, - .. - } => { - let mut ty = ty; - let from_bits = ty_bits(ty) as u8; - // A machine ABI implementation should ensure that stack frames - // have "reasonable" size. All current ABIs for machinst - // backends (aarch64 and x64) enforce a 128MB limit. - let off = i32::try_from(offset) - .expect("Argument stack offset greater than 2GB; should hit impl limit first"); - let ext = M::get_ext_mode(self.sig.call_conv, extension); - // Trash the from_reg; it should be its last use. - match (ext, from_bits) { - (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) - if n < word_bits => - { - let from_reg = from_regs - .only_reg() - .expect("extension only possible from one-reg value"); - assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class()); - let signed = ext == ArgumentExtension::Sext; - ret.push(M::gen_extend( - from_reg, - from_reg.to_reg(), - signed, - from_bits, - /* to_bits = */ word_bits, - )); - // Store the extended version. - ty = M::word_type(); - } - _ => {} - }; - ret.extend( - gen_store_base_offset_multi::( - self.ret_area_ptr.unwrap().to_reg(), - off, - non_writable_value_regs(from_regs), - ty, - ) - .into_iter(), - ); + &ABIArg::StructArg { .. } => { + panic!("StructArg in return position is unsupported"); } - &ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"), } ret } @@ -1345,20 +1389,30 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec uses.extend(regs.regs().iter().map(|r| r.to_reg())), - _ => {} + if let &ABIArg::Slots { ref slots, .. } = arg { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + uses.push(reg.to_reg()); + } + _ => {} + } + } } } // Compute defs: all retval regs, and all caller-save (clobbered) regs. let mut defs = M::get_regs_clobbered_by_call(sig.call_conv); for ret in &sig.rets { - match ret { - &ABIArg::Reg { regs, .. } => { - defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg()))) + if let &ABIArg::Slots { ref slots, .. } = ret { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + defs.push(Writable::from_reg(reg.to_reg())); + } + _ => {} + } } - _ => {} } } @@ -1406,7 +1460,7 @@ impl ABICallerImpl { flags: &settings::Flags, ) -> CodegenResult> { let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig)?; + let sig = ABISig::from_func_sig::(&ir_sig, flags)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); Ok(ABICallerImpl { ir_sig, @@ -1431,7 +1485,7 @@ impl ABICallerImpl { flags: &settings::Flags, ) -> CodegenResult> { let ir_sig = ensure_struct_return_ptr_is_returned(sig); - let sig = ABISig::from_func_sig::(&ir_sig)?; + let sig = ABISig::from_func_sig::(&ir_sig, flags)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); Ok(ABICallerImpl { ir_sig, @@ -1501,75 +1555,73 @@ impl ABICaller for ABICallerImpl { let word_rc = M::word_reg_class(); let word_bits = M::word_bits() as usize; match &self.sig.args[idx] { - &ABIArg::Reg { - regs, - ty, - extension, - .. - } => { - let ext = M::get_ext_mode(self.sig.call_conv, extension); - if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - let reg = regs.only_reg().unwrap(); - assert_eq!(word_rc, reg.get_class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), - }; - ctx.emit(M::gen_extend( - Writable::from_reg(reg.to_reg()), - from_regs.only_reg().unwrap(), - signed, - ty_bits(ty) as u8, - word_bits as u8, - )); - } else { - for insn in gen_move_multi::( - writable_value_regs(regs.map(|r| r.to_reg())), - from_regs, - ty, - ) { - ctx.emit(insn); + &ABIArg::Slots { ref slots, .. } => { + assert_eq!(from_regs.len(), slots.len()); + for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) { + match slot { + &ABIArgSlot::Reg { + reg, ty, extension, .. + } => { + let ext = M::get_ext_mode(self.sig.call_conv, extension); + if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { + assert_eq!(word_rc, reg.get_class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + ctx.emit(M::gen_extend( + Writable::from_reg(reg.to_reg()), + *from_reg, + signed, + ty_bits(ty) as u8, + word_bits as u8, + )); + } else { + ctx.emit(M::gen_move( + Writable::from_reg(reg.to_reg()), + *from_reg, + ty, + )); + } + } + &ABIArgSlot::Stack { + offset, + ty, + extension, + .. + } => { + let mut ty = ty; + let ext = M::get_ext_mode(self.sig.call_conv, extension); + if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { + assert_eq!(word_rc, from_reg.get_class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + // Extend in place in the source register. Our convention is to + // treat high bits as undefined for values in registers, so this + // is safe, even for an argument that is nominally read-only. + ctx.emit(M::gen_extend( + Writable::from_reg(*from_reg), + *from_reg, + signed, + ty_bits(ty) as u8, + word_bits as u8, + )); + // Store the extended version. + ty = M::word_type(); + } + ctx.emit(M::gen_store_stack( + StackAMode::SPOffset(offset, ty), + *from_reg, + ty, + )); + } } } } - &ABIArg::Stack { - offset, - ty, - extension, - .. - } => { - let mut ty = ty; - let ext = M::get_ext_mode(self.sig.call_conv, extension); - if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - let from_reg = from_regs - .only_reg() - .expect("only one reg for sub-word value width"); - assert_eq!(word_rc, from_reg.get_class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), - }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - ctx.emit(M::gen_extend( - Writable::from_reg(from_reg), - from_reg, - signed, - ty_bits(ty) as u8, - word_bits as u8, - )); - // Store the extended version. - ty = M::word_type(); - } - for insn in - gen_store_stack_multi::(StackAMode::SPOffset(offset, ty), from_regs, ty) - { - ctx.emit(insn); - } - } &ABIArg::StructArg { offset, size, .. } => { let src_ptr = from_regs.only_reg().unwrap(); let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); @@ -1618,24 +1670,29 @@ impl ABICaller for ABICallerImpl { into_regs: ValueRegs>, ) { match &self.sig.rets[idx] { - // Extension mode doesn't matter because we're copying out, not in, - // and we ignore high bits in our own registers by convention. - &ABIArg::Reg { regs, ty, .. } => { - for insn in gen_move_multi::(into_regs, regs.map(|r| r.to_reg()), ty) { - ctx.emit(insn); + &ABIArg::Slots { ref slots, .. } => { + assert_eq!(into_regs.len(), slots.len()); + for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { + match slot { + // Extension mode doesn't matter because we're copying out, not in, + // and we ignore high bits in our own registers by convention. + &ABIArgSlot::Reg { reg, ty, .. } => { + ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty)); + } + &ABIArgSlot::Stack { offset, ty, .. } => { + let ret_area_base = self.sig.stack_arg_space; + ctx.emit(M::gen_load_stack( + StackAMode::SPOffset(offset + ret_area_base, ty), + *into_reg, + ty, + )); + } + } } } - &ABIArg::Stack { offset, ty, .. } => { - let ret_area_base = self.sig.stack_arg_space; - for insn in gen_load_stack_multi::( - StackAMode::SPOffset(offset + ret_area_base, ty), - into_regs, - ty, - ) { - ctx.emit(insn); - } + &ABIArg::StructArg { .. } => { + panic!("StructArg not supported in return position"); } - &ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"), } } diff --git a/cranelift/codegen/src/machinst/helpers.rs b/cranelift/codegen/src/machinst/helpers.rs index b61d9560dc..40139d61ee 100644 --- a/cranelift/codegen/src/machinst/helpers.rs +++ b/cranelift/codegen/src/machinst/helpers.rs @@ -3,6 +3,7 @@ use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs}; use crate::ir::Type; use regalloc::{Reg, Writable}; +use std::ops::{Add, BitAnd, Not, Sub}; /// Returns the size (in bits) of a given type. pub fn ty_bits(ty: Type) -> usize { @@ -26,3 +27,17 @@ pub(crate) fn get_output_reg>( ) -> ValueRegs> { ctx.get_output(spec.insn, spec.output) } + +/// Align a size up to a power-of-two alignment. +pub(crate) fn align_to(x: N, alignment: N) -> N +where + N: Not + + BitAnd + + Add + + Sub + + From + + Copy, +{ + let alignment_mask = alignment - 1.into(); + (x + alignment_mask) & !alignment_mask +} diff --git a/cranelift/codegen/src/machinst/valueregs.rs b/cranelift/codegen/src/machinst/valueregs.rs index 1f9f0f05dd..8ffcc73ab3 100644 --- a/cranelift/codegen/src/machinst/valueregs.rs +++ b/cranelift/codegen/src/machinst/valueregs.rs @@ -175,11 +175,13 @@ impl ValueRegs { } /// Create a writable ValueRegs. +#[allow(dead_code)] pub(crate) fn writable_value_regs(regs: ValueRegs) -> ValueRegs> { regs.map(|r| Writable::from_reg(r)) } /// Strip a writable ValueRegs down to a readonly ValueRegs. +#[allow(dead_code)] pub(crate) fn non_writable_value_regs(regs: ValueRegs>) -> ValueRegs { regs.map(|r| r.to_reg()) } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index a1bc954c54..e91e4e219b 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -398,6 +398,7 @@ use_pinned_reg_as_heap_base = false enable_simd = false enable_atomics = true enable_safepoints = false +enable_llvm_abi_extensions = false emit_all_ones_funcaddrs = false enable_probestack = true probestack_func_adjusts_sp = false diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif new file mode 100644 index 0000000000..b66817f9ac --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -0,0 +1,299 @@ +test compile +set enable_llvm_abi_extensions=true +target x86_64 +feature "experimental_x64" + +function %f0(i64, i64, i64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return v0 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f1(i64, i64, i64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return v1 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f2(i64, i64, i64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %r8, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f3(i64, i64, i64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return v3 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %r9, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f4(i64, i64, f64, i64) -> f64 windows_fastcall { +block0(v0: i64, v1: i64, v2: f64, v3: i64): + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movaps %xmm2, %xmm0 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f5(i64, i64, f64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: f64, v3: i64): + return v3 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %r9, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64): + return v5 +} + +;; This is truly odd (because of the regalloc ordering), but it works. Note +;; that we're spilling and using rsi, which is a callee-save in fastcall, because +;; the regalloc order is optimized for SysV. Also note that because we copy args +;; out of their input locations to separate vregs, we have a spurious load +;; from [rbp+48]. Ordinarily these moves are coalesced because the dest vreg +;; is allocated as a caller-save (volatile), but here again we allocate rsi +;; first and so have to spill it (and consequently don't coalesce). +;; +;; TODO(#2704): fix regalloc's register priority ordering! + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %rsi, 0(%rsp) +; nextln: virtual_sp_offset_adjust 16 +; nextln: movq 48(%rbp), %rsi +; nextln: movq 56(%rbp), %rsi +; nextln: movq %rsi, %rax +; nextln: movq 0(%rsp), %rsi +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f7(i128, i64, i128, i128) -> i128 windows_fastcall { +block0(v0: i128, v1: i64, v2: i128, v3: i128): + return v3 +} + +;; Again, terrible regalloc behavior. The important part is that `v3` comes +;; from [rbp+56] and [rbp+64], i.e., the second and third non-shadow +;; stack slot. + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %rsi, 0(%rsp) +; nextln: movq %rdi, 8(%rsp) +; nextln: virtual_sp_offset_adjust 16 +; nextln: movq 48(%rbp), %rsi +; nextln: movq 56(%rbp), %rsi +; nextln: movq 64(%rbp), %rdi +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rdx +; nextln: movq 0(%rsp), %rsi +; nextln: movq 8(%rsp), %rdi +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f8(i64) -> i64 windows_fastcall { + sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall + fn0 = %g sig0 + +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + v2 = call fn0(v0, v0, v1, v1, v0, v0) + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %rsi, 0(%rsp) +; nextln: virtual_sp_offset_adjust 16 +; nextln: movq %rcx, %rsi +; nextln: cvtsi2sd %rsi, %xmm3 +; nextln: subq $$48, %rsp +; nextln: virtual_sp_offset_adjust 48 +; nextln: movq %rsi, %rcx +; nextln: movq %rsi, %rdx +; nextln: movaps %xmm3, %xmm2 +; nextln: movq %rsi, 32(%rsp) +; nextln: movq %rsi, 40(%rsp) +; nextln: load_ext_name %g+0, %rsi +; nextln: call *%rsi +; nextln: addq $$48, %rsp +; nextln: virtual_sp_offset_adjust -48 +; nextln: movq 0(%rsp), %rsi +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f9(i64) -> f64 windows_fastcall { +block0(v0: i64): + v1 = load.f64 v0+0 + v2 = load.f64 v0+8 + v3 = load.f64 v0+16 + v4 = load.f64 v0+24 + v5 = load.f64 v0+32 + v6 = load.f64 v0+40 + v7 = load.f64 v0+48 + v8 = load.f64 v0+56 + v9 = load.f64 v0+64 + v10 = load.f64 v0+72 + v11 = load.f64 v0+80 + v12 = load.f64 v0+88 + v13 = load.f64 v0+96 + v14 = load.f64 v0+104 + v15 = load.f64 v0+112 + v16 = load.f64 v0+120 + v17 = load.f64 v0+128 + v18 = load.f64 v0+136 + v19 = load.f64 v0+144 + v20 = load.f64 v0+152 + + v21 = fadd.f64 v1, v2 + v22 = fadd.f64 v3, v4 + v23 = fadd.f64 v5, v6 + v24 = fadd.f64 v7, v8 + v25 = fadd.f64 v9, v10 + v26 = fadd.f64 v11, v12 + v27 = fadd.f64 v13, v14 + v28 = fadd.f64 v15, v16 + v29 = fadd.f64 v17, v18 + v30 = fadd.f64 v19, v20 + + v31 = fadd.f64 v21, v22 + v32 = fadd.f64 v23, v24 + v33 = fadd.f64 v25, v26 + v34 = fadd.f64 v27, v28 + v35 = fadd.f64 v29, v30 + + v36 = fadd.f64 v31, v32 + v37 = fadd.f64 v33, v34 + + v38 = fadd.f64 v36, v37 + + v39 = fadd.f64 v38, v35 + + return v39 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$208, %rsp +; nextln: movdqu %xmm6, 0(%rsp) +; nextln: movdqu %xmm7, 16(%rsp) +; nextln: movdqu %xmm8, 32(%rsp) +; nextln: movdqu %xmm9, 48(%rsp) +; nextln: movdqu %xmm10, 64(%rsp) +; nextln: movdqu %xmm11, 80(%rsp) +; nextln: movdqu %xmm12, 96(%rsp) +; nextln: movdqu %xmm13, 112(%rsp) +; nextln: movdqu %xmm14, 128(%rsp) +; nextln: movdqu %xmm15, 144(%rsp) +; nextln: virtual_sp_offset_adjust 160 +; nextln: movsd 0(%rcx), %xmm0 +; nextln: movsd %xmm0, rsp(16 + virtual offset) +; nextln: movsd 8(%rcx), %xmm1 +; nextln: movsd 16(%rcx), %xmm0 +; nextln: movsd %xmm0, rsp(24 + virtual offset) +; nextln: movsd 24(%rcx), %xmm3 +; nextln: movsd 32(%rcx), %xmm0 +; nextln: movsd %xmm0, rsp(32 + virtual offset) +; nextln: movsd 40(%rcx), %xmm5 +; nextln: movsd 48(%rcx), %xmm6 +; nextln: movsd 56(%rcx), %xmm7 +; nextln: movsd 64(%rcx), %xmm8 +; nextln: movsd 72(%rcx), %xmm9 +; nextln: movsd 80(%rcx), %xmm10 +; nextln: movsd 88(%rcx), %xmm11 +; nextln: movsd 96(%rcx), %xmm12 +; nextln: movsd 104(%rcx), %xmm13 +; nextln: movsd 112(%rcx), %xmm14 +; nextln: movsd 120(%rcx), %xmm15 +; nextln: movsd 128(%rcx), %xmm0 +; nextln: movsd %xmm0, rsp(0 + virtual offset) +; nextln: movsd 136(%rcx), %xmm0 +; nextln: movsd 144(%rcx), %xmm2 +; nextln: movsd %xmm2, rsp(8 + virtual offset) +; nextln: movsd 152(%rcx), %xmm2 +; nextln: nop len=0 +; nextln: movsd rsp(16 + virtual offset), %xmm4 +; nextln: addsd %xmm1, %xmm4 +; nextln: movsd %xmm4, rsp(16 + virtual offset) +; nextln: movsd rsp(24 + virtual offset), %xmm1 +; nextln: addsd %xmm3, %xmm1 +; nextln: movsd rsp(32 + virtual offset), %xmm4 +; nextln: addsd %xmm5, %xmm4 +; nextln: addsd %xmm7, %xmm6 +; nextln: addsd %xmm9, %xmm8 +; nextln: addsd %xmm11, %xmm10 +; nextln: addsd %xmm13, %xmm12 +; nextln: addsd %xmm15, %xmm14 +; nextln: movsd rsp(0 + virtual offset), %xmm3 +; nextln: addsd %xmm0, %xmm3 +; nextln: movsd rsp(8 + virtual offset), %xmm0 +; nextln: addsd %xmm2, %xmm0 +; nextln: movsd rsp(16 + virtual offset), %xmm2 +; nextln: addsd %xmm1, %xmm2 +; nextln: addsd %xmm6, %xmm4 +; nextln: addsd %xmm10, %xmm8 +; nextln: addsd %xmm14, %xmm12 +; nextln: addsd %xmm0, %xmm3 +; nextln: addsd %xmm4, %xmm2 +; nextln: addsd %xmm12, %xmm8 +; nextln: addsd %xmm8, %xmm2 +; nextln: addsd %xmm3, %xmm2 +; nextln: movaps %xmm2, %xmm0 +; nextln: movdqu 0(%rsp), %xmm6 +; nextln: movdqu 16(%rsp), %xmm7 +; nextln: movdqu 32(%rsp), %xmm8 +; nextln: movdqu 48(%rsp), %xmm9 +; nextln: movdqu 64(%rsp), %xmm10 +; nextln: movdqu 80(%rsp), %xmm11 +; nextln: movdqu 96(%rsp), %xmm12 +; nextln: movdqu 112(%rsp), %xmm13 +; nextln: movdqu 128(%rsp), %xmm14 +; nextln: movdqu 144(%rsp), %xmm15 +; nextln: addq $$160, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 5e5d2ffb86..7c012f7917 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,4 +1,5 @@ test compile +set enable_llvm_abi_extensions=true target x86_64 feature "experimental_x64" @@ -941,17 +942,17 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): v11 = iadd.i128 v9, v10 return v11 -; check: movq %rsp, %rbp +; check: pushq %rbp +; nextln: movq %rsp, %rbp ; nextln: subq $$16, %rsp ; nextln: movq %r12, 0(%rsp) ; nextln: movq %r13, 8(%rsp) ; nextln: virtual_sp_offset_adjust 16 -; nextln: movq 16(%rbp), %r9 -; nextln: movq 24(%rbp), %r10 -; nextln: movq 32(%rbp), %r12 -; nextln: movq 40(%rbp), %r11 -; nextln: movq 48(%rbp), %rax -; nextln: movq 56(%rbp), %r13 +; nextln: movq 16(%rbp), %r10 +; nextln: movq 24(%rbp), %r12 +; nextln: movq 32(%rbp), %r11 +; nextln: movq 40(%rbp), %rax +; nextln: movq 48(%rbp), %r13 ; nextln: addq %rdx, %rdi ; nextln: adcq %rcx, %rsi ; nextln: xorq %rcx, %rcx @@ -989,10 +990,10 @@ block0(v0: i128): ; nextln: movq %r10, 16(%rsi) ; nextln: movq %r11, 24(%rsi) ; nextln: movq %r12, 32(%rsi) -; nextln: movq %r13, 48(%rsi) -; nextln: movq %r14, 56(%rsi) -; nextln: movq %rdi, 64(%rsi) -; nextln: movq %rbx, 72(%rsi) +; nextln: movq %r13, 40(%rsi) +; nextln: movq %r14, 48(%rsi) +; nextln: movq %rdi, 56(%rsi) +; nextln: movq %rbx, 64(%rsi) } diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index 3492a71997..97ec0142f5 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -1,4 +1,5 @@ test compile +set enable_llvm_abi_extensions=true target x86_64 feature "experimental_x64"