diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 2a869185d4..44b848edc5 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1145,12 +1145,11 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, F32) | (RegClass::V128, F64) => 1, - (RegClass::V128, _) => 2, + match rc { + RegClass::I64 => 1, + RegClass::V128 => 2, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index aba6e548d8..4fcf423860 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -436,7 +436,7 @@ impl ABIMachineSpec for Arm32MachineDeps { unimplemented!("StructArgs not implemented for ARM32 yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { match rc { RegClass::I32 => 1, _ => panic!("Unexpected register class!"), diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 8712d0de28..5dd7dd55c6 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -685,11 +685,11 @@ impl ABIMachineSpec for S390xMachineDeps { unimplemented!("StructArgs not implemented for S390X yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::F64, _) => 1, + match rc { + RegClass::I64 => 1, + RegClass::F64 => 1, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 7a4352b38f..9687ac5f26 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -719,12 +719,11 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, - (RegClass::V128, _) => 2, + match rc { + RegClass::I64 => 1, + RegClass::V128 => 2, _ => panic!("Unexpected register class!"), } } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index d9e1e3c6ae..d32993d1ce 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -161,22 +161,13 @@ pub trait ABICallee { fn stack_args_size(&self) -> u32; /// Get the spill-slot size. - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32; + fn get_spillslot_size(&self, rc: RegClass) -> u32; - /// Generate a spill. The type, if known, is given; this can be used to - /// generate a store instruction optimized for the particular type rather - /// than the RegClass (e.g., only F64 that resides in a V128 register). If - /// no type is given, the implementation should spill the whole register. - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I; + /// Generate a spill. + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I; - /// Generate a reload (fill). As for spills, the type may be given to allow - /// a more optimized load instruction to be generated. - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Self::I; + /// Generate a reload (fill). + fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot) -> Self::I; } /// Trait implemented by an object that tracks ABI-related state and can diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 0da169b42c..5126752db0 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -502,9 +502,8 @@ pub trait ABIMachineSpec { size: usize, ) -> SmallVec<[Self::I; 8]>; - /// Get the number of spillslots required for the given register-class and - /// type. - fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32; + /// Get the number of spillslots required for the given register-class. + fn get_number_of_spillslots_for_value(rc: RegClass) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -658,6 +657,17 @@ fn get_special_purpose_param_register( } } +fn ty_from_class(class: RegClass) -> Type { + match class { + RegClass::I32 => I32, + RegClass::I64 => I64, + RegClass::F32 => F32, + RegClass::F64 => F64, + RegClass::V128 => I8X16, + _ => panic!("Unknown regclass: {:?}", class), + } +} + impl ABICalleeImpl { /// Create a new body ABI instance. pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { @@ -856,26 +866,6 @@ fn generate_gv( } } -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I32/I64 (integer) registers. - (None, rc) if rc == M::word_reg_class() => M::word_type(), - _ => panic!("Unexpected register class!"), - } -} - fn gen_load_stack_multi( from: StackAMode, dst: ValueRegs>, @@ -1203,14 +1193,6 @@ impl ABICallee for ABICalleeImpl { let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - // Integer types smaller than word size have been spilled as words below, - // and therefore must be reloaded in the same type. - let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { - M::word_type() - } else { - ty - }; - gen_load_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty) } @@ -1227,18 +1209,6 @@ impl ABICallee for ABICalleeImpl { let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - // When reloading from a spill slot, we might have lost information about real integer - // types. For instance, on the x64 backend, a zero-extension can become spurious and - // optimized into a move, causing vregs of types I32 and I64 to share the same coalescing - // equivalency class. As a matter of fact, such a value can be spilled as an I32 and later - // reloaded as an I64; to make sure the high bits are always defined, do a word-sized store - // all the time, in this case. - let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { - M::word_type() - } else { - ty - }; - gen_store_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty) } @@ -1383,25 +1353,20 @@ impl ABICallee for ABICalleeImpl { self.sig.stack_arg_space as u32 } - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - M::get_number_of_spillslots_for_value(rc, ty) + fn get_spillslot_size(&self, rc: RegClass) -> u32 { + M::get_number_of_spillslots_for_value(rc) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I { - let ty = ty_from_ty_hint_or_reg_class::(from_reg.to_reg(), ty); + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I { + let ty = ty_from_class(from_reg.to_reg().get_class()); self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg())) .into_iter() .next() .unwrap() } - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Self::I { - let ty = ty_from_ty_hint_or_reg_class::(to_reg.to_reg().to_reg(), ty); + fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot) -> Self::I { + let ty = ty_from_class(to_reg.to_reg().get_class()); self.load_spillslot( from_slot, ty, diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 96a1acc909..0e8300f49d 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -688,24 +688,21 @@ impl RegallocFunction for VCode { self.vreg_types.len() } - fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 { - let ty = self.vreg_type(vreg); - self.abi.get_spillslot_size(regclass, ty) + fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 { + self.abi.get_spillslot_size(regclass) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option) -> I { - let ty = vreg.map(|v| self.vreg_type(v)); - self.abi.gen_spill(to_slot, from_reg, ty) + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option) -> I { + self.abi.gen_spill(to_slot, from_reg) } fn gen_reload( &self, to_reg: Writable, from_slot: SpillSlot, - vreg: Option, + _: Option, ) -> I { - let ty = vreg.map(|v| self.vreg_type(v)); - self.abi.gen_reload(to_reg, from_slot, ty) + self.abi.gen_reload(to_reg, from_slot) } fn gen_move(&self, to_reg: Writable, from_reg: RealReg, vreg: VirtualReg) -> I { diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index dcbf1edbc4..21ba70c987 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -133,28 +133,28 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #32 +; nextln: sub sp, sp, #48 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str s0, [sp] +; nextln: str q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str d0, [sp, #8] +; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str d0, [sp, #16] +; nextln: str q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr s0, [sp] +; nextln: ldr q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #8] +; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #16] +; nextln: ldr q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: add sp, sp, #32 +; nextln: add sp, sp, #48 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -223,28 +223,28 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #32 +; nextln: sub sp, sp, #48 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: str s0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str d0, [sp, #8] +; nextln: str q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 ; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr s0, [sp] +; nextln: str q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr d0, [sp, #8] +; nextln: ldr q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 ; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: add sp, sp, #32 +; nextln: ldr q0, [sp, #32] +; nextln: ldr x0, 8 ; b 12 ; data +; nextln: blr x0 +; nextln: add sp, sp, #48 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 2081bf5ebb..bead5b77f4 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -238,34 +238,34 @@ block0(v0: i64): ; nextln: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; nextln: movq %rsp, %rbp ; nextln: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 } -; nextln: subq $$192, %rsp -; nextln: movdqu %xmm6, 32(%rsp) +; nextln: subq $$224, %rsp +; nextln: movdqu %xmm6, 64(%rsp) ; nextln: unwind SaveReg { clobber_offset: 0, reg: r6V } -; nextln: movdqu %xmm7, 48(%rsp) +; nextln: movdqu %xmm7, 80(%rsp) ; nextln: unwind SaveReg { clobber_offset: 16, reg: r7V } -; nextln: movdqu %xmm8, 64(%rsp) +; nextln: movdqu %xmm8, 96(%rsp) ; nextln: unwind SaveReg { clobber_offset: 32, reg: r8V } -; nextln: movdqu %xmm9, 80(%rsp) +; nextln: movdqu %xmm9, 112(%rsp) ; nextln: unwind SaveReg { clobber_offset: 48, reg: r9V } -; nextln: movdqu %xmm10, 96(%rsp) +; nextln: movdqu %xmm10, 128(%rsp) ; nextln: unwind SaveReg { clobber_offset: 64, reg: r10V } -; nextln: movdqu %xmm11, 112(%rsp) +; nextln: movdqu %xmm11, 144(%rsp) ; nextln: unwind SaveReg { clobber_offset: 80, reg: r11V } -; nextln: movdqu %xmm12, 128(%rsp) +; nextln: movdqu %xmm12, 160(%rsp) ; nextln: unwind SaveReg { clobber_offset: 96, reg: r12V } -; nextln: movdqu %xmm13, 144(%rsp) +; nextln: movdqu %xmm13, 176(%rsp) ; nextln: unwind SaveReg { clobber_offset: 112, reg: r13V } -; nextln: movdqu %xmm14, 160(%rsp) +; nextln: movdqu %xmm14, 192(%rsp) ; nextln: unwind SaveReg { clobber_offset: 128, reg: r14V } -; nextln: movdqu %xmm15, 176(%rsp) +; nextln: movdqu %xmm15, 208(%rsp) ; nextln: unwind SaveReg { clobber_offset: 144, reg: r15V } ; nextln: movsd 0(%rcx), %xmm4 ; nextln: movsd 8(%rcx), %xmm1 ; nextln: movsd 16(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(16 + virtual offset) +; nextln: movdqu %xmm0, rsp(32 + virtual offset) ; nextln: movsd 24(%rcx), %xmm3 ; nextln: movsd 32(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(24 + virtual offset) +; nextln: movdqu %xmm0, rsp(48 + virtual offset) ; nextln: movsd 40(%rcx), %xmm5 ; nextln: movsd 48(%rcx), %xmm6 ; nextln: movsd 56(%rcx), %xmm7 @@ -278,24 +278,24 @@ block0(v0: i64): ; nextln: movsd 112(%rcx), %xmm14 ; nextln: movsd 120(%rcx), %xmm15 ; nextln: movsd 128(%rcx), %xmm0 -; nextln: movsd %xmm0, rsp(0 + virtual offset) +; nextln: movdqu %xmm0, rsp(0 + virtual offset) ; nextln: movsd 136(%rcx), %xmm0 ; nextln: movsd 144(%rcx), %xmm2 -; nextln: movsd %xmm2, rsp(8 + virtual offset) +; nextln: movdqu %xmm2, rsp(16 + virtual offset) ; nextln: movsd 152(%rcx), %xmm2 ; nextln: addsd %xmm1, %xmm4 -; nextln: movsd rsp(16 + virtual offset), %xmm1 +; nextln: movdqu rsp(32 + virtual offset), %xmm1 ; nextln: addsd %xmm3, %xmm1 -; nextln: movsd rsp(24 + virtual offset), %xmm3 +; nextln: movdqu rsp(48 + virtual offset), %xmm3 ; nextln: addsd %xmm5, %xmm3 ; nextln: addsd %xmm7, %xmm6 ; nextln: addsd %xmm9, %xmm8 ; nextln: addsd %xmm11, %xmm10 ; nextln: addsd %xmm13, %xmm12 ; nextln: addsd %xmm15, %xmm14 -; nextln: movsd rsp(0 + virtual offset), %xmm5 +; nextln: movdqu rsp(0 + virtual offset), %xmm5 ; nextln: addsd %xmm0, %xmm5 -; nextln: movsd rsp(8 + virtual offset), %xmm0 +; nextln: movdqu rsp(16 + virtual offset), %xmm0 ; nextln: addsd %xmm2, %xmm0 ; nextln: addsd %xmm1, %xmm4 ; nextln: addsd %xmm6, %xmm3 @@ -307,17 +307,17 @@ block0(v0: i64): ; nextln: addsd %xmm8, %xmm4 ; nextln: addsd %xmm5, %xmm4 ; nextln: movaps %xmm4, %xmm0 -; nextln: movdqu 32(%rsp), %xmm6 -; nextln: movdqu 48(%rsp), %xmm7 -; nextln: movdqu 64(%rsp), %xmm8 -; nextln: movdqu 80(%rsp), %xmm9 -; nextln: movdqu 96(%rsp), %xmm10 -; nextln: movdqu 112(%rsp), %xmm11 -; nextln: movdqu 128(%rsp), %xmm12 -; nextln: movdqu 144(%rsp), %xmm13 -; nextln: movdqu 160(%rsp), %xmm14 -; nextln: movdqu 176(%rsp), %xmm15 -; nextln: addq $$192, %rsp +; nextln: movdqu 64(%rsp), %xmm6 +; nextln: movdqu 80(%rsp), %xmm7 +; nextln: movdqu 96(%rsp), %xmm8 +; nextln: movdqu 112(%rsp), %xmm9 +; nextln: movdqu 128(%rsp), %xmm10 +; nextln: movdqu 144(%rsp), %xmm11 +; nextln: movdqu 160(%rsp), %xmm12 +; nextln: movdqu 176(%rsp), %xmm13 +; nextln: movdqu 192(%rsp), %xmm14 +; nextln: movdqu 208(%rsp), %xmm15 +; nextln: addq $$224, %rsp ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret diff --git a/tests/misc_testsuite/simd/spillslot-size-fuzzbug.wast b/tests/misc_testsuite/simd/spillslot-size-fuzzbug.wast new file mode 100644 index 0000000000..287b927447 --- /dev/null +++ b/tests/misc_testsuite/simd/spillslot-size-fuzzbug.wast @@ -0,0 +1,11 @@ +(module + (func (export "test") (result f32 f32) + i32.const 0 + f32.convert_i32_s + v128.const i32x4 0 0 0 0 + data.drop 0 + f32x4.extract_lane 0 + data.drop 0) + (data "")) + +(assert_return (invoke "test") (f32.const 0.0) (f32.const 0.0))