Don't merge loads for xmm registers (#4891)

Do not merge loads for xmm registers, as alignment requirements currently aren't satisfied with clif lowered from wasm.

Fixes #4890
This commit is contained in:
Trevor Elliott
2022-09-12 10:14:35 -07:00
committed by GitHub
parent 555309a480
commit ad09c273c6
3 changed files with 57 additions and 48 deletions

View File

@@ -166,14 +166,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
.unwrap();
}
if let InputSourceInst::UniqueUse(src_insn, 0) = inputs.inst {
if let Some((addr_input, offset)) = is_mergeable_load(self.lower_ctx, src_insn) {
self.lower_ctx.sink_inst(src_insn);
let amode = lower_to_amode(self.lower_ctx, addr_input, offset);
return XmmMem::new(RegMem::mem(amode)).unwrap();
}
}
XmmMem::new(RegMem::reg(self.put_in_reg(val))).unwrap()
}

View File

@@ -221,42 +221,44 @@ block0(v0: i64):
; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
; movq %rsp, %rbp
; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
; subq %rsp, $224, %rsp
; movdqu %xmm6, 64(%rsp)
; subq %rsp, $256, %rsp
; movdqu %xmm6, 96(%rsp)
; unwind SaveReg { clobber_offset: 0, reg: p6f }
; movdqu %xmm7, 80(%rsp)
; movdqu %xmm7, 112(%rsp)
; unwind SaveReg { clobber_offset: 16, reg: p7f }
; movdqu %xmm8, 96(%rsp)
; movdqu %xmm8, 128(%rsp)
; unwind SaveReg { clobber_offset: 32, reg: p8f }
; movdqu %xmm9, 112(%rsp)
; movdqu %xmm9, 144(%rsp)
; unwind SaveReg { clobber_offset: 48, reg: p9f }
; movdqu %xmm10, 128(%rsp)
; movdqu %xmm10, 160(%rsp)
; unwind SaveReg { clobber_offset: 64, reg: p10f }
; movdqu %xmm11, 144(%rsp)
; movdqu %xmm11, 176(%rsp)
; unwind SaveReg { clobber_offset: 80, reg: p11f }
; movdqu %xmm12, 160(%rsp)
; movdqu %xmm12, 192(%rsp)
; unwind SaveReg { clobber_offset: 96, reg: p12f }
; movdqu %xmm13, 176(%rsp)
; movdqu %xmm13, 208(%rsp)
; unwind SaveReg { clobber_offset: 112, reg: p13f }
; movdqu %xmm14, 192(%rsp)
; movdqu %xmm14, 224(%rsp)
; unwind SaveReg { clobber_offset: 128, reg: p14f }
; movdqu %xmm15, 208(%rsp)
; movdqu %xmm15, 240(%rsp)
; unwind SaveReg { clobber_offset: 144, reg: p15f }
; block0:
; movsd 0(%rcx), %xmm0
; movsd 8(%rcx), %xmm11
; movdqu %xmm11, rsp(48 + virtual offset)
; movsd 16(%rcx), %xmm6
; movdqu %xmm11, rsp(80 + virtual offset)
; movsd 16(%rcx), %xmm3
; movdqu %xmm3, rsp(0 + virtual offset)
; movsd 24(%rcx), %xmm15
; movdqu %xmm15, rsp(32 + virtual offset)
; movdqu %xmm15, rsp(64 + virtual offset)
; movsd 32(%rcx), %xmm14
; movsd 40(%rcx), %xmm1
; movdqu %xmm1, rsp(16 + virtual offset)
; movdqu %xmm1, rsp(48 + virtual offset)
; movsd 48(%rcx), %xmm8
; movsd 56(%rcx), %xmm9
; movdqu %xmm9, rsp(0 + virtual offset)
; movsd 56(%rcx), %xmm6
; movdqu %xmm6, rsp(32 + virtual offset)
; movsd 64(%rcx), %xmm13
; movsd 72(%rcx), %xmm3
; movsd 72(%rcx), %xmm5
; movdqu %xmm5, rsp(16 + virtual offset)
; movsd 80(%rcx), %xmm10
; movsd 88(%rcx), %xmm5
; movsd 96(%rcx), %xmm4
@@ -266,21 +268,24 @@ block0(v0: i64):
; movsd 128(%rcx), %xmm7
; movsd 136(%rcx), %xmm15
; movsd 144(%rcx), %xmm2
; movdqu rsp(48 + virtual offset), %xmm1
; addsd %xmm0, %xmm1, %xmm0
; movdqu rsp(32 + virtual offset), %xmm1
; addsd %xmm6, %xmm1, %xmm6
; movdqu rsp(16 + virtual offset), %xmm1
; addsd %xmm14, %xmm1, %xmm14
; movdqu rsp(0 + virtual offset), %xmm1
; addsd %xmm8, %xmm1, %xmm8
; addsd %xmm13, %xmm3, %xmm13
; movsd 152(%rcx), %xmm1
; movdqu rsp(80 + virtual offset), %xmm3
; addsd %xmm0, %xmm3, %xmm0
; movdqu rsp(0 + virtual offset), %xmm3
; movdqu rsp(64 + virtual offset), %xmm6
; addsd %xmm3, %xmm6, %xmm3
; movdqu rsp(48 + virtual offset), %xmm6
; addsd %xmm14, %xmm6, %xmm14
; movdqu rsp(32 + virtual offset), %xmm6
; addsd %xmm8, %xmm6, %xmm8
; movdqu rsp(16 + virtual offset), %xmm6
; addsd %xmm13, %xmm6, %xmm13
; addsd %xmm10, %xmm5, %xmm10
; addsd %xmm4, %xmm9, %xmm4
; addsd %xmm12, %xmm11, %xmm12
; addsd %xmm7, %xmm15, %xmm7
; addsd %xmm2, 152(%rcx), %xmm2
; addsd %xmm0, %xmm6, %xmm0
; addsd %xmm2, %xmm1, %xmm2
; addsd %xmm0, %xmm3, %xmm0
; addsd %xmm14, %xmm8, %xmm14
; addsd %xmm13, %xmm10, %xmm13
; addsd %xmm4, %xmm12, %xmm4
@@ -289,17 +294,17 @@ block0(v0: i64):
; addsd %xmm13, %xmm4, %xmm13
; addsd %xmm0, %xmm13, %xmm0
; addsd %xmm0, %xmm7, %xmm0
; movdqu 64(%rsp), %xmm6
; movdqu 80(%rsp), %xmm7
; movdqu 96(%rsp), %xmm8
; movdqu 112(%rsp), %xmm9
; movdqu 128(%rsp), %xmm10
; movdqu 144(%rsp), %xmm11
; movdqu 160(%rsp), %xmm12
; movdqu 176(%rsp), %xmm13
; movdqu 192(%rsp), %xmm14
; movdqu 208(%rsp), %xmm15
; addq %rsp, $224, %rsp
; movdqu 96(%rsp), %xmm6
; movdqu 112(%rsp), %xmm7
; movdqu 128(%rsp), %xmm8
; movdqu 144(%rsp), %xmm9
; movdqu 160(%rsp), %xmm10
; movdqu 176(%rsp), %xmm11
; movdqu 192(%rsp), %xmm12
; movdqu 208(%rsp), %xmm13
; movdqu 224(%rsp), %xmm14
; movdqu 240(%rsp), %xmm15
; addq %rsp, $256, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,12 @@
(module
(func (param i32) (result f32)
f32.const 0
local.get 0
f32.load offset=1
f32.copysign
)
(memory 1)
(export "f" (func 0))
)
(assert_return (invoke "f" (i32.const 0)) (f32.const 0))