x64: Enable load-coalescing for SSE/AVX instructions (#5841)
* x64: Enable load-coalescing for SSE/AVX instructions This commit unlocks the ability to fold loads into operands of SSE and AVX instructions. This is beneficial for both function size when it happens in addition to being able to reduce register pressure. Previously this was not done because most SSE instructions require memory to be aligned. AVX instructions, however, do not have alignment requirements. The solution implemented here is one recommended by Chris which is to add a new `XmmMemAligned` newtype wrapper around `XmmMem`. All SSE instructions are now annotated as requiring an `XmmMemAligned` operand except for a new new instruction styles used specifically for instructions that don't require alignment (e.g. `movdqu`, `*sd`, and `*ss` instructions). All existing instruction helpers continue to take `XmmMem`, however. This way if an AVX lowering is chosen it can be used as-is. If an SSE lowering is chosen, however, then an automatic conversion from `XmmMem` to `XmmMemAligned` kicks in. This automatic conversion only fails for unaligned addresses in which case a load instruction is emitted and the operand becomes a temporary register instead. A number of prior `Xmm` arguments have now been converted to `XmmMem` as well. One change from this commit is that loading an unaligned operand for an SSE instruction previously would use the "correct type" of load, e.g. `movups` for f32x4 or `movup` for f64x2, but now the loading happens in a context without type information so the `movdqu` instruction is generated. According to [this stack overflow question][question] it looks like modern processors won't penalize this "wrong" choice of type when the operand is then used for f32 or f64 oriented instructions. Finally this commit improves some reuse of logic in the `put_in_*_mem*` helper to share code with `sinkable_load` and avoid duplication. With this in place some various ISLE rules have been updated as well. In the tests it can be seen that AVX-instructions are now automatically load-coalesced and use memory operands in a few cases. [question]: https://stackoverflow.com/questions/40854819/is-there-any-situation-where-using-movdqu-and-movupd-is-better-than-movups * Fix tests * Fix move-and-extend to be unaligned These don't have alignment requirements like other xmm instructions as well. Additionally add some ISA tests to ensure that their output is tested. * Review comments
This commit is contained in:
@@ -333,44 +333,42 @@ block0(v0: i64):
|
||||
; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 }
|
||||
; movq %rsp, %rbp
|
||||
; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 }
|
||||
; subq %rsp, $256, %rsp
|
||||
; movdqu %xmm6, 96(%rsp)
|
||||
; subq %rsp, $224, %rsp
|
||||
; movdqu %xmm6, 64(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 0, reg: p6f }
|
||||
; movdqu %xmm7, 112(%rsp)
|
||||
; movdqu %xmm7, 80(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 16, reg: p7f }
|
||||
; movdqu %xmm8, 128(%rsp)
|
||||
; movdqu %xmm8, 96(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 32, reg: p8f }
|
||||
; movdqu %xmm9, 144(%rsp)
|
||||
; movdqu %xmm9, 112(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 48, reg: p9f }
|
||||
; movdqu %xmm10, 160(%rsp)
|
||||
; movdqu %xmm10, 128(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 64, reg: p10f }
|
||||
; movdqu %xmm11, 176(%rsp)
|
||||
; movdqu %xmm11, 144(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 80, reg: p11f }
|
||||
; movdqu %xmm12, 192(%rsp)
|
||||
; movdqu %xmm12, 160(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 96, reg: p12f }
|
||||
; movdqu %xmm13, 208(%rsp)
|
||||
; movdqu %xmm13, 176(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 112, reg: p13f }
|
||||
; movdqu %xmm14, 224(%rsp)
|
||||
; movdqu %xmm14, 192(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 128, reg: p14f }
|
||||
; movdqu %xmm15, 240(%rsp)
|
||||
; movdqu %xmm15, 208(%rsp)
|
||||
; unwind SaveReg { clobber_offset: 144, reg: p15f }
|
||||
; block0:
|
||||
; movsd 0(%rcx), %xmm0
|
||||
; movsd 8(%rcx), %xmm10
|
||||
; movdqu %xmm10, rsp(80 + virtual offset)
|
||||
; movsd 16(%rcx), %xmm2
|
||||
; movdqu %xmm2, rsp(0 + virtual offset)
|
||||
; movdqu %xmm10, rsp(48 + virtual offset)
|
||||
; movsd 16(%rcx), %xmm5
|
||||
; movsd 24(%rcx), %xmm14
|
||||
; movdqu %xmm14, rsp(64 + virtual offset)
|
||||
; movdqu %xmm14, rsp(32 + virtual offset)
|
||||
; movsd 32(%rcx), %xmm13
|
||||
; movsd 40(%rcx), %xmm15
|
||||
; movdqu %xmm15, rsp(48 + virtual offset)
|
||||
; movdqu %xmm15, rsp(16 + virtual offset)
|
||||
; movsd 48(%rcx), %xmm7
|
||||
; movsd 56(%rcx), %xmm5
|
||||
; movdqu %xmm5, rsp(32 + virtual offset)
|
||||
; movsd 56(%rcx), %xmm8
|
||||
; movdqu %xmm8, rsp(0 + virtual offset)
|
||||
; movsd 64(%rcx), %xmm12
|
||||
; movsd 72(%rcx), %xmm4
|
||||
; movdqu %xmm4, rsp(16 + virtual offset)
|
||||
; movsd 72(%rcx), %xmm2
|
||||
; movsd 80(%rcx), %xmm9
|
||||
; movsd 88(%rcx), %xmm4
|
||||
; movsd 96(%rcx), %xmm3
|
||||
@@ -380,24 +378,21 @@ block0(v0: i64):
|
||||
; movsd 128(%rcx), %xmm6
|
||||
; movsd 136(%rcx), %xmm14
|
||||
; movsd 144(%rcx), %xmm1
|
||||
; movsd 152(%rcx), %xmm15
|
||||
; movdqu rsp(80 + virtual offset), %xmm2
|
||||
; addsd %xmm0, %xmm2, %xmm0
|
||||
; movdqu rsp(0 + virtual offset), %xmm2
|
||||
; movdqu rsp(64 + virtual offset), %xmm5
|
||||
; addsd %xmm2, %xmm5, %xmm2
|
||||
; movdqu rsp(48 + virtual offset), %xmm5
|
||||
; addsd %xmm13, %xmm5, %xmm13
|
||||
; movdqu rsp(32 + virtual offset), %xmm5
|
||||
; addsd %xmm7, %xmm5, %xmm7
|
||||
; movdqu rsp(16 + virtual offset), %xmm5
|
||||
; addsd %xmm12, %xmm5, %xmm12
|
||||
; movdqu rsp(48 + virtual offset), %xmm15
|
||||
; addsd %xmm0, %xmm15, %xmm0
|
||||
; movdqu rsp(32 + virtual offset), %xmm15
|
||||
; addsd %xmm5, %xmm15, %xmm5
|
||||
; movdqu rsp(16 + virtual offset), %xmm15
|
||||
; addsd %xmm13, %xmm15, %xmm13
|
||||
; movdqu rsp(0 + virtual offset), %xmm15
|
||||
; addsd %xmm7, %xmm15, %xmm7
|
||||
; addsd %xmm12, %xmm2, %xmm12
|
||||
; addsd %xmm9, %xmm4, %xmm9
|
||||
; addsd %xmm3, %xmm8, %xmm3
|
||||
; addsd %xmm11, %xmm10, %xmm11
|
||||
; addsd %xmm6, %xmm14, %xmm6
|
||||
; addsd %xmm1, %xmm15, %xmm1
|
||||
; addsd %xmm0, %xmm2, %xmm0
|
||||
; addsd %xmm1, 152(%rcx), %xmm1
|
||||
; addsd %xmm0, %xmm5, %xmm0
|
||||
; addsd %xmm13, %xmm7, %xmm13
|
||||
; addsd %xmm12, %xmm9, %xmm12
|
||||
; addsd %xmm3, %xmm11, %xmm3
|
||||
@@ -406,17 +401,17 @@ block0(v0: i64):
|
||||
; addsd %xmm12, %xmm3, %xmm12
|
||||
; addsd %xmm0, %xmm12, %xmm0
|
||||
; addsd %xmm0, %xmm6, %xmm0
|
||||
; movdqu 96(%rsp), %xmm6
|
||||
; movdqu 112(%rsp), %xmm7
|
||||
; movdqu 128(%rsp), %xmm8
|
||||
; movdqu 144(%rsp), %xmm9
|
||||
; movdqu 160(%rsp), %xmm10
|
||||
; movdqu 176(%rsp), %xmm11
|
||||
; movdqu 192(%rsp), %xmm12
|
||||
; movdqu 208(%rsp), %xmm13
|
||||
; movdqu 224(%rsp), %xmm14
|
||||
; movdqu 240(%rsp), %xmm15
|
||||
; addq %rsp, $256, %rsp
|
||||
; movdqu 64(%rsp), %xmm6
|
||||
; movdqu 80(%rsp), %xmm7
|
||||
; movdqu 96(%rsp), %xmm8
|
||||
; movdqu 112(%rsp), %xmm9
|
||||
; movdqu 128(%rsp), %xmm10
|
||||
; movdqu 144(%rsp), %xmm11
|
||||
; movdqu 160(%rsp), %xmm12
|
||||
; movdqu 176(%rsp), %xmm13
|
||||
; movdqu 192(%rsp), %xmm14
|
||||
; movdqu 208(%rsp), %xmm15
|
||||
; addq %rsp, $224, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -425,34 +420,32 @@ block0(v0: i64):
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; subq $0x100, %rsp
|
||||
; movdqu %xmm6, 0x60(%rsp)
|
||||
; movdqu %xmm7, 0x70(%rsp)
|
||||
; movdqu %xmm8, 0x80(%rsp)
|
||||
; movdqu %xmm9, 0x90(%rsp)
|
||||
; movdqu %xmm10, 0xa0(%rsp)
|
||||
; movdqu %xmm11, 0xb0(%rsp)
|
||||
; movdqu %xmm12, 0xc0(%rsp)
|
||||
; movdqu %xmm13, 0xd0(%rsp)
|
||||
; movdqu %xmm14, 0xe0(%rsp)
|
||||
; movdqu %xmm15, 0xf0(%rsp)
|
||||
; block1: ; offset 0x67
|
||||
; subq $0xe0, %rsp
|
||||
; movdqu %xmm6, 0x40(%rsp)
|
||||
; movdqu %xmm7, 0x50(%rsp)
|
||||
; movdqu %xmm8, 0x60(%rsp)
|
||||
; movdqu %xmm9, 0x70(%rsp)
|
||||
; movdqu %xmm10, 0x80(%rsp)
|
||||
; movdqu %xmm11, 0x90(%rsp)
|
||||
; movdqu %xmm12, 0xa0(%rsp)
|
||||
; movdqu %xmm13, 0xb0(%rsp)
|
||||
; movdqu %xmm14, 0xc0(%rsp)
|
||||
; movdqu %xmm15, 0xd0(%rsp)
|
||||
; block1: ; offset 0x61
|
||||
; movsd (%rcx), %xmm0 ; trap: heap_oob
|
||||
; movsd 8(%rcx), %xmm10 ; trap: heap_oob
|
||||
; movdqu %xmm10, 0x50(%rsp)
|
||||
; movsd 0x10(%rcx), %xmm2 ; trap: heap_oob
|
||||
; movdqu %xmm2, (%rsp)
|
||||
; movdqu %xmm10, 0x30(%rsp)
|
||||
; movsd 0x10(%rcx), %xmm5 ; trap: heap_oob
|
||||
; movsd 0x18(%rcx), %xmm14 ; trap: heap_oob
|
||||
; movdqu %xmm14, 0x40(%rsp)
|
||||
; movdqu %xmm14, 0x20(%rsp)
|
||||
; movsd 0x20(%rcx), %xmm13 ; trap: heap_oob
|
||||
; movsd 0x28(%rcx), %xmm15 ; trap: heap_oob
|
||||
; movdqu %xmm15, 0x30(%rsp)
|
||||
; movdqu %xmm15, 0x10(%rsp)
|
||||
; movsd 0x30(%rcx), %xmm7 ; trap: heap_oob
|
||||
; movsd 0x38(%rcx), %xmm5 ; trap: heap_oob
|
||||
; movdqu %xmm5, 0x20(%rsp)
|
||||
; movsd 0x38(%rcx), %xmm8 ; trap: heap_oob
|
||||
; movdqu %xmm8, (%rsp)
|
||||
; movsd 0x40(%rcx), %xmm12 ; trap: heap_oob
|
||||
; movsd 0x48(%rcx), %xmm4 ; trap: heap_oob
|
||||
; movdqu %xmm4, 0x10(%rsp)
|
||||
; movsd 0x48(%rcx), %xmm2 ; trap: heap_oob
|
||||
; movsd 0x50(%rcx), %xmm9 ; trap: heap_oob
|
||||
; movsd 0x58(%rcx), %xmm4 ; trap: heap_oob
|
||||
; movsd 0x60(%rcx), %xmm3 ; trap: heap_oob
|
||||
@@ -462,24 +455,21 @@ block0(v0: i64):
|
||||
; movsd 0x80(%rcx), %xmm6 ; trap: heap_oob
|
||||
; movsd 0x88(%rcx), %xmm14 ; trap: heap_oob
|
||||
; movsd 0x90(%rcx), %xmm1 ; trap: heap_oob
|
||||
; movsd 0x98(%rcx), %xmm15 ; trap: heap_oob
|
||||
; movdqu 0x50(%rsp), %xmm2
|
||||
; addsd %xmm2, %xmm0
|
||||
; movdqu (%rsp), %xmm2
|
||||
; movdqu 0x40(%rsp), %xmm5
|
||||
; addsd %xmm5, %xmm2
|
||||
; movdqu 0x30(%rsp), %xmm5
|
||||
; addsd %xmm5, %xmm13
|
||||
; movdqu 0x20(%rsp), %xmm5
|
||||
; addsd %xmm5, %xmm7
|
||||
; movdqu 0x10(%rsp), %xmm5
|
||||
; addsd %xmm5, %xmm12
|
||||
; movdqu 0x30(%rsp), %xmm15
|
||||
; addsd %xmm15, %xmm0
|
||||
; movdqu 0x20(%rsp), %xmm15
|
||||
; addsd %xmm15, %xmm5
|
||||
; movdqu 0x10(%rsp), %xmm15
|
||||
; addsd %xmm15, %xmm13
|
||||
; movdqu (%rsp), %xmm15
|
||||
; addsd %xmm15, %xmm7
|
||||
; addsd %xmm2, %xmm12
|
||||
; addsd %xmm4, %xmm9
|
||||
; addsd %xmm8, %xmm3
|
||||
; addsd %xmm10, %xmm11
|
||||
; addsd %xmm14, %xmm6
|
||||
; addsd %xmm15, %xmm1
|
||||
; addsd %xmm2, %xmm0
|
||||
; addsd 0x98(%rcx), %xmm1 ; trap: heap_oob
|
||||
; addsd %xmm5, %xmm0
|
||||
; addsd %xmm7, %xmm13
|
||||
; addsd %xmm9, %xmm12
|
||||
; addsd %xmm11, %xmm3
|
||||
@@ -488,17 +478,17 @@ block0(v0: i64):
|
||||
; addsd %xmm3, %xmm12
|
||||
; addsd %xmm12, %xmm0
|
||||
; addsd %xmm6, %xmm0
|
||||
; movdqu 0x60(%rsp), %xmm6
|
||||
; movdqu 0x70(%rsp), %xmm7
|
||||
; movdqu 0x80(%rsp), %xmm8
|
||||
; movdqu 0x90(%rsp), %xmm9
|
||||
; movdqu 0xa0(%rsp), %xmm10
|
||||
; movdqu 0xb0(%rsp), %xmm11
|
||||
; movdqu 0xc0(%rsp), %xmm12
|
||||
; movdqu 0xd0(%rsp), %xmm13
|
||||
; movdqu 0xe0(%rsp), %xmm14
|
||||
; movdqu 0xf0(%rsp), %xmm15
|
||||
; addq $0x100, %rsp
|
||||
; movdqu 0x40(%rsp), %xmm6
|
||||
; movdqu 0x50(%rsp), %xmm7
|
||||
; movdqu 0x60(%rsp), %xmm8
|
||||
; movdqu 0x70(%rsp), %xmm9
|
||||
; movdqu 0x80(%rsp), %xmm10
|
||||
; movdqu 0x90(%rsp), %xmm11
|
||||
; movdqu 0xa0(%rsp), %xmm12
|
||||
; movdqu 0xb0(%rsp), %xmm13
|
||||
; movdqu 0xc0(%rsp), %xmm14
|
||||
; movdqu 0xd0(%rsp), %xmm15
|
||||
; addq $0xe0, %rsp
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
@@ -13,8 +13,7 @@ block0(v0: f32x4, v1: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movups 0(%rdi), %xmm4
|
||||
; vorps %xmm0, %xmm4, %xmm0
|
||||
; vorps %xmm0, 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -24,8 +23,7 @@ block0(v0: f32x4, v1: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movups (%rdi), %xmm4
|
||||
; vorps %xmm4, %xmm0, %xmm0
|
||||
; vorps (%rdi), %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
@@ -42,12 +40,11 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movss 0(%rdi), %xmm7
|
||||
; movl $-2147483648, %ecx
|
||||
; movd %ecx, %xmm5
|
||||
; vandnps %xmm5, const(0), %xmm8
|
||||
; vandps %xmm5, %xmm7, %xmm9
|
||||
; vorps %xmm8, %xmm9, %xmm0
|
||||
; movl $-2147483648, %eax
|
||||
; movd %eax, %xmm4
|
||||
; vandnps %xmm4, const(0), %xmm6
|
||||
; vandps %xmm4, 0(%rdi), %xmm8
|
||||
; vorps %xmm6, %xmm8, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
@@ -57,12 +54,11 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movss (%rdi), %xmm7
|
||||
; movl $0x80000000, %ecx
|
||||
; movd %ecx, %xmm5
|
||||
; vandnps 0x17(%rip), %xmm5, %xmm8
|
||||
; vandps %xmm7, %xmm5, %xmm9
|
||||
; vorps %xmm9, %xmm8, %xmm0
|
||||
; movl $0x80000000, %eax
|
||||
; movd %eax, %xmm4
|
||||
; vandnps 0x1b(%rip), %xmm4, %xmm6
|
||||
; vandps (%rdi), %xmm4, %xmm8
|
||||
; vorps %xmm8, %xmm6, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
@@ -78,6 +74,8 @@ block0(v0: i64):
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
; addb %al, (%rax)
|
||||
|
||||
function %bor_f32x4(f32x4, f32x4) -> f32x4 {
|
||||
block0(v0: f32x4, v1: f32x4):
|
||||
|
||||
154
cranelift/filetests/filetests/isa/x64/simd-load-extend.clif
Normal file
154
cranelift/filetests/filetests/isa/x64/simd-load-extend.clif
Normal file
@@ -0,0 +1,154 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %uload8x8(i64) -> i16x8 {
|
||||
block0(v0: i64):
|
||||
v1 = uload8x8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovzxbw 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovzxbw (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sload8x8(i64) -> i16x8 {
|
||||
block0(v0: i64):
|
||||
v1 = sload8x8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovsxbw 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovsxbw (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %uload16x4(i64) -> i32x4 {
|
||||
block0(v0: i64):
|
||||
v1 = uload16x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovzxwd 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovzxwd (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sload16x4(i64) -> i32x4 {
|
||||
block0(v0: i64):
|
||||
v1 = sload16x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovsxwd 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovsxwd (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %uload32x2(i64) -> i64x2 {
|
||||
block0(v0: i64):
|
||||
v1 = uload32x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovzxdq 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovzxdq (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %sload32x2(i64) -> i64x2 {
|
||||
block0(v0: i64):
|
||||
v1 = sload32x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; pmovsxdq 0(%rdi), %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; pmovsxdq (%rdi), %xmm0 ; trap: heap_oob
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
Reference in New Issue
Block a user