Don't merge loads for xmm registers (#4891)
Do not merge loads for xmm registers, as alignment requirements currently aren't satisfied with clif lowered from wasm. Fixes #4890
This commit is contained in:
@@ -166,14 +166,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
if let InputSourceInst::UniqueUse(src_insn, 0) = inputs.inst {
|
|
||||||
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) {
|
|
||||||
self.lower_ctx.sink_inst(src_insn);
|
|
||||||
let amode = lower_to_amode(self.lower_ctx, addr_input, offset);
|
|
||||||
return XmmMem::new(RegMem::mem(amode)).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
XmmMem::new(RegMem::reg(self.put_in_reg(val))).unwrap()
|
XmmMem::new(RegMem::reg(self.put_in_reg(val))).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -221,42 +221,44 @@ block0(v0: i64):
|
|||||||
; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
|
; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
|
; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
|
||||||
; subq %rsp, $224, %rsp
|
; subq %rsp, $256, %rsp
|
||||||
; movdqu %xmm6, 64(%rsp)
|
; movdqu %xmm6, 96(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 0, reg: p6f }
|
; unwind SaveReg { clobber_offset: 0, reg: p6f }
|
||||||
; movdqu %xmm7, 80(%rsp)
|
; movdqu %xmm7, 112(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 16, reg: p7f }
|
; unwind SaveReg { clobber_offset: 16, reg: p7f }
|
||||||
; movdqu %xmm8, 96(%rsp)
|
; movdqu %xmm8, 128(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 32, reg: p8f }
|
; unwind SaveReg { clobber_offset: 32, reg: p8f }
|
||||||
; movdqu %xmm9, 112(%rsp)
|
; movdqu %xmm9, 144(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 48, reg: p9f }
|
; unwind SaveReg { clobber_offset: 48, reg: p9f }
|
||||||
; movdqu %xmm10, 128(%rsp)
|
; movdqu %xmm10, 160(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 64, reg: p10f }
|
; unwind SaveReg { clobber_offset: 64, reg: p10f }
|
||||||
; movdqu %xmm11, 144(%rsp)
|
; movdqu %xmm11, 176(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 80, reg: p11f }
|
; unwind SaveReg { clobber_offset: 80, reg: p11f }
|
||||||
; movdqu %xmm12, 160(%rsp)
|
; movdqu %xmm12, 192(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 96, reg: p12f }
|
; unwind SaveReg { clobber_offset: 96, reg: p12f }
|
||||||
; movdqu %xmm13, 176(%rsp)
|
; movdqu %xmm13, 208(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 112, reg: p13f }
|
; unwind SaveReg { clobber_offset: 112, reg: p13f }
|
||||||
; movdqu %xmm14, 192(%rsp)
|
; movdqu %xmm14, 224(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 128, reg: p14f }
|
; unwind SaveReg { clobber_offset: 128, reg: p14f }
|
||||||
; movdqu %xmm15, 208(%rsp)
|
; movdqu %xmm15, 240(%rsp)
|
||||||
; unwind SaveReg { clobber_offset: 144, reg: p15f }
|
; unwind SaveReg { clobber_offset: 144, reg: p15f }
|
||||||
; block0:
|
; block0:
|
||||||
; movsd 0(%rcx), %xmm0
|
; movsd 0(%rcx), %xmm0
|
||||||
; movsd 8(%rcx), %xmm11
|
; movsd 8(%rcx), %xmm11
|
||||||
; movdqu %xmm11, rsp(48 + virtual offset)
|
; movdqu %xmm11, rsp(80 + virtual offset)
|
||||||
; movsd 16(%rcx), %xmm6
|
; movsd 16(%rcx), %xmm3
|
||||||
|
; movdqu %xmm3, rsp(0 + virtual offset)
|
||||||
; movsd 24(%rcx), %xmm15
|
; movsd 24(%rcx), %xmm15
|
||||||
; movdqu %xmm15, rsp(32 + virtual offset)
|
; movdqu %xmm15, rsp(64 + virtual offset)
|
||||||
; movsd 32(%rcx), %xmm14
|
; movsd 32(%rcx), %xmm14
|
||||||
; movsd 40(%rcx), %xmm1
|
; movsd 40(%rcx), %xmm1
|
||||||
; movdqu %xmm1, rsp(16 + virtual offset)
|
; movdqu %xmm1, rsp(48 + virtual offset)
|
||||||
; movsd 48(%rcx), %xmm8
|
; movsd 48(%rcx), %xmm8
|
||||||
; movsd 56(%rcx), %xmm9
|
; movsd 56(%rcx), %xmm6
|
||||||
; movdqu %xmm9, rsp(0 + virtual offset)
|
; movdqu %xmm6, rsp(32 + virtual offset)
|
||||||
; movsd 64(%rcx), %xmm13
|
; movsd 64(%rcx), %xmm13
|
||||||
; movsd 72(%rcx), %xmm3
|
; movsd 72(%rcx), %xmm5
|
||||||
|
; movdqu %xmm5, rsp(16 + virtual offset)
|
||||||
; movsd 80(%rcx), %xmm10
|
; movsd 80(%rcx), %xmm10
|
||||||
; movsd 88(%rcx), %xmm5
|
; movsd 88(%rcx), %xmm5
|
||||||
; movsd 96(%rcx), %xmm4
|
; movsd 96(%rcx), %xmm4
|
||||||
@@ -266,21 +268,24 @@ block0(v0: i64):
|
|||||||
; movsd 128(%rcx), %xmm7
|
; movsd 128(%rcx), %xmm7
|
||||||
; movsd 136(%rcx), %xmm15
|
; movsd 136(%rcx), %xmm15
|
||||||
; movsd 144(%rcx), %xmm2
|
; movsd 144(%rcx), %xmm2
|
||||||
; movdqu rsp(48 + virtual offset), %xmm1
|
; movsd 152(%rcx), %xmm1
|
||||||
; addsd %xmm0, %xmm1, %xmm0
|
; movdqu rsp(80 + virtual offset), %xmm3
|
||||||
; movdqu rsp(32 + virtual offset), %xmm1
|
; addsd %xmm0, %xmm3, %xmm0
|
||||||
; addsd %xmm6, %xmm1, %xmm6
|
; movdqu rsp(0 + virtual offset), %xmm3
|
||||||
; movdqu rsp(16 + virtual offset), %xmm1
|
; movdqu rsp(64 + virtual offset), %xmm6
|
||||||
; addsd %xmm14, %xmm1, %xmm14
|
; addsd %xmm3, %xmm6, %xmm3
|
||||||
; movdqu rsp(0 + virtual offset), %xmm1
|
; movdqu rsp(48 + virtual offset), %xmm6
|
||||||
; addsd %xmm8, %xmm1, %xmm8
|
; addsd %xmm14, %xmm6, %xmm14
|
||||||
; addsd %xmm13, %xmm3, %xmm13
|
; movdqu rsp(32 + virtual offset), %xmm6
|
||||||
|
; addsd %xmm8, %xmm6, %xmm8
|
||||||
|
; movdqu rsp(16 + virtual offset), %xmm6
|
||||||
|
; addsd %xmm13, %xmm6, %xmm13
|
||||||
; addsd %xmm10, %xmm5, %xmm10
|
; addsd %xmm10, %xmm5, %xmm10
|
||||||
; addsd %xmm4, %xmm9, %xmm4
|
; addsd %xmm4, %xmm9, %xmm4
|
||||||
; addsd %xmm12, %xmm11, %xmm12
|
; addsd %xmm12, %xmm11, %xmm12
|
||||||
; addsd %xmm7, %xmm15, %xmm7
|
; addsd %xmm7, %xmm15, %xmm7
|
||||||
; addsd %xmm2, 152(%rcx), %xmm2
|
; addsd %xmm2, %xmm1, %xmm2
|
||||||
; addsd %xmm0, %xmm6, %xmm0
|
; addsd %xmm0, %xmm3, %xmm0
|
||||||
; addsd %xmm14, %xmm8, %xmm14
|
; addsd %xmm14, %xmm8, %xmm14
|
||||||
; addsd %xmm13, %xmm10, %xmm13
|
; addsd %xmm13, %xmm10, %xmm13
|
||||||
; addsd %xmm4, %xmm12, %xmm4
|
; addsd %xmm4, %xmm12, %xmm4
|
||||||
@@ -289,17 +294,17 @@ block0(v0: i64):
|
|||||||
; addsd %xmm13, %xmm4, %xmm13
|
; addsd %xmm13, %xmm4, %xmm13
|
||||||
; addsd %xmm0, %xmm13, %xmm0
|
; addsd %xmm0, %xmm13, %xmm0
|
||||||
; addsd %xmm0, %xmm7, %xmm0
|
; addsd %xmm0, %xmm7, %xmm0
|
||||||
; movdqu 64(%rsp), %xmm6
|
; movdqu 96(%rsp), %xmm6
|
||||||
; movdqu 80(%rsp), %xmm7
|
; movdqu 112(%rsp), %xmm7
|
||||||
; movdqu 96(%rsp), %xmm8
|
; movdqu 128(%rsp), %xmm8
|
||||||
; movdqu 112(%rsp), %xmm9
|
; movdqu 144(%rsp), %xmm9
|
||||||
; movdqu 128(%rsp), %xmm10
|
; movdqu 160(%rsp), %xmm10
|
||||||
; movdqu 144(%rsp), %xmm11
|
; movdqu 176(%rsp), %xmm11
|
||||||
; movdqu 160(%rsp), %xmm12
|
; movdqu 192(%rsp), %xmm12
|
||||||
; movdqu 176(%rsp), %xmm13
|
; movdqu 208(%rsp), %xmm13
|
||||||
; movdqu 192(%rsp), %xmm14
|
; movdqu 224(%rsp), %xmm14
|
||||||
; movdqu 208(%rsp), %xmm15
|
; movdqu 240(%rsp), %xmm15
|
||||||
; addq %rsp, $224, %rsp
|
; addq %rsp, $256, %rsp
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
12
tests/misc_testsuite/issue4890.wast
Normal file
12
tests/misc_testsuite/issue4890.wast
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
(module
|
||||||
|
(func (param i32) (result f32)
|
||||||
|
f32.const 0
|
||||||
|
local.get 0
|
||||||
|
f32.load offset=1
|
||||||
|
f32.copysign
|
||||||
|
)
|
||||||
|
(memory 1)
|
||||||
|
(export "f" (func 0))
|
||||||
|
)
|
||||||
|
|
||||||
|
(assert_return (invoke "f" (i32.const 0)) (f32.const 0))
|
||||||
Reference in New Issue
Block a user