From 1fdeddd0d3d350bbae193eb63d19e7aaaefd6ecc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 18 Sep 2017 18:23:53 -0700 Subject: [PATCH] Add Intel encodings for floating point load/store instructions. Include wasm/*-memory64.cton tests too. --- .../filetests/isa/intel/binary32-float.cton | 56 +++++++++ .../filetests/isa/intel/binary64-float.cton | 56 +++++++++ cranelift/filetests/wasm/f32-memory64.cton | 27 ++++ cranelift/filetests/wasm/f64-memory64.cton | 27 ++++ cranelift/filetests/wasm/i32-memory64.cton | 88 +++++++++++++ cranelift/filetests/wasm/i64-memory64.cton | 117 ++++++++++++++++++ lib/cretonne/meta/isa/intel/encodings.py | 20 +++ lib/cretonne/meta/isa/intel/recipes.py | 57 +++++++++ 8 files changed, 448 insertions(+) create mode 100644 cranelift/filetests/wasm/f32-memory64.cton create mode 100644 cranelift/filetests/wasm/f64-memory64.cton create mode 100644 cranelift/filetests/wasm/i32-memory64.cton create mode 100644 cranelift/filetests/wasm/i64-memory64.cton diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index 5c0dc43b18..3756fbff1e 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -77,6 +77,34 @@ ebb0: ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + ; Load/Store + + ; asm: movd (%ecx), %xmm5 + [-,%xmm5] v100 = load.f32 v0 ; bin: 66 0f 6e 29 + ; asm: movd (%esi), %xmm2 + [-,%xmm2] v101 = load.f32 v1 ; bin: 66 0f 6e 16 + ; asm: movd 50(%ecx), %xmm5 + [-,%xmm5] v110 = load.f32 v0+50 ; bin: 66 0f 6e 69 32 + ; asm: movd -50(%esi), %xmm2 + [-,%xmm2] v111 = load.f32 v1-50 ; bin: 66 0f 6e 56 ce + ; asm: movd 10000(%ecx), %xmm5 + [-,%xmm5] v120 = load.f32 v0+10000 ; bin: 66 0f 6e a9 00002710 + ; asm: movd -10000(%esi), %xmm2 + [-,%xmm2] v121 = load.f32 v1-10000 ; bin: 66 0f 6e 96 ffffd8f0 + + ; asm: movd %xmm5, (%ecx) + [-] store.f32 v100, v0 ; bin: 66 0f 7e 29 + ; asm: movd %xmm2, (%esi) + [-] store.f32 v101, v1 ; bin: 66 0f 7e 16 + ; asm: movd %xmm5, 50(%ecx) + [-] store.f32 v100, v0+50 ; bin: 66 0f 7e 69 32 + ; asm: movd %xmm2, -50(%esi) + [-] store.f32 v101, v1-50 ; bin: 66 0f 7e 56 ce + ; asm: movd %xmm5, 10000(%ecx) + [-] store.f32 v100, v0+10000 ; bin: 66 0f 7e a9 00002710 + ; asm: movd %xmm2, -10000(%esi) + [-] store.f32 v101, v1-10000 ; bin: 66 0f 7e 96 ffffd8f0 + return } @@ -142,5 +170,33 @@ ebb0: ; asm: xorps %xmm5, %xmm2 [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + ; Load/Store + + ; asm: movq (%ecx), %xmm5 + [-,%xmm5] v100 = load.f64 v0 ; bin: f3 0f 7e 29 + ; asm: movq (%esi), %xmm2 + [-,%xmm2] v101 = load.f64 v1 ; bin: f3 0f 7e 16 + ; asm: movq 50(%ecx), %xmm5 + [-,%xmm5] v110 = load.f64 v0+50 ; bin: f3 0f 7e 69 32 + ; asm: movq -50(%esi), %xmm2 + [-,%xmm2] v111 = load.f64 v1-50 ; bin: f3 0f 7e 56 ce + ; asm: movq 10000(%ecx), %xmm5 + [-,%xmm5] v120 = load.f64 v0+10000 ; bin: f3 0f 7e a9 00002710 + ; asm: movq -10000(%esi), %xmm2 + [-,%xmm2] v121 = load.f64 v1-10000 ; bin: f3 0f 7e 96 ffffd8f0 + + ; asm: movq %xmm5, (%ecx) + [-] store.f64 v100, v0 ; bin: 66 0f d6 29 + ; asm: movq %xmm2, (%esi) + [-] store.f64 v101, v1 ; bin: 66 0f d6 16 + ; asm: movq %xmm5, 50(%ecx) + [-] store.f64 v100, v0+50 ; bin: 66 0f d6 69 32 + ; asm: movq %xmm2, -50(%esi) + [-] store.f64 v101, v1-50 ; bin: 66 0f d6 56 ce + ; asm: movq %xmm5, 10000(%ecx) + [-] store.f64 v100, v0+10000 ; bin: 66 0f d6 a9 00002710 + ; asm: movq %xmm2, -10000(%esi) + [-] store.f64 v101, v1-10000 ; bin: 66 0f d6 96 ffffd8f0 + return } diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index 64dd1ebd05..83aaf6d753 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -85,6 +85,34 @@ ebb0: ; asm: xorps %xmm5, %xmm10 [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 + ; Load/Store + + ; asm: movd (%r14), %xmm5 + [-,%xmm5] v100 = load.f32 v3 ; bin: 66 41 0f 6e 2e + ; asm: movd (%rax), %xmm10 + [-,%xmm10] v101 = load.f32 v2 ; bin: 66 44 0f 6e 10 + ; asm: movd 50(%r14), %xmm5 + [-,%xmm5] v110 = load.f32 v3+50 ; bin: 66 41 0f 6e 6e 32 + ; asm: movd -50(%rax), %xmm10 + [-,%xmm10] v111 = load.f32 v2-50 ; bin: 66 44 0f 6e 50 ce + ; asm: movd 10000(%r14), %xmm5 + [-,%xmm5] v120 = load.f32 v3+10000 ; bin: 66 41 0f 6e ae 00002710 + ; asm: movd -10000(%rax), %xmm10 + [-,%xmm10] v121 = load.f32 v2-10000 ; bin: 66 44 0f 6e 90 ffffd8f0 + + ; asm: movd %xmm5, (%r14) + [-] store.f32 v100, v3 ; bin: 66 41 0f 7e 2e + ; asm: movd %xmm10, (%rax) + [-] store.f32 v101, v2 ; bin: 66 44 0f 7e 10 + ; asm: movd %xmm5, 50(%r14) + [-] store.f32 v100, v3+50 ; bin: 66 41 0f 7e 6e 32 + ; asm: movd %xmm10, -50(%rax) + [-] store.f32 v101, v2-50 ; bin: 66 44 0f 7e 50 ce + ; asm: movd %xmm5, 10000(%r14) + [-] store.f32 v100, v3+10000 ; bin: 66 41 0f 7e ae 00002710 + ; asm: movd %xmm10, -10000(%rax) + [-] store.f32 v101, v2-10000 ; bin: 66 44 0f 7e 90 ffffd8f0 + return } @@ -165,5 +193,33 @@ ebb0: ; asm: xorps %xmm5, %xmm10 [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 + ; Load/Store + + ; asm: movq (%r14), %xmm5 + [-,%xmm5] v100 = load.f64 v3 ; bin: f3 41 0f 7e 2e + ; asm: movq (%rax), %xmm10 + [-,%xmm10] v101 = load.f64 v2 ; bin: f3 44 0f 7e 10 + ; asm: movq 50(%r14), %xmm5 + [-,%xmm5] v110 = load.f64 v3+50 ; bin: f3 41 0f 7e 6e 32 + ; asm: movq -50(%rax), %xmm10 + [-,%xmm10] v111 = load.f64 v2-50 ; bin: f3 44 0f 7e 50 ce + ; asm: movq 10000(%r14), %xmm5 + [-,%xmm5] v120 = load.f64 v3+10000 ; bin: f3 41 0f 7e ae 00002710 + ; asm: movq -10000(%rax), %xmm10 + [-,%xmm10] v121 = load.f64 v2-10000 ; bin: f3 44 0f 7e 90 ffffd8f0 + + ; asm: movq %xmm5, (%r14) + [-] store.f64 v100, v3 ; bin: 66 41 0f d6 2e + ; asm: movq %xmm10, (%rax) + [-] store.f64 v101, v2 ; bin: 66 44 0f d6 10 + ; asm: movq %xmm5, 50(%r14) + [-] store.f64 v100, v3+50 ; bin: 66 41 0f d6 6e 32 + ; asm: movq %xmm10, -50(%rax) + [-] store.f64 v101, v2-50 ; bin: 66 44 0f d6 50 ce + ; asm: movq %xmm5, 10000(%r14) + [-] store.f64 v100, v3+10000 ; bin: 66 41 0f d6 ae 00002710 + ; asm: movq %xmm10, -10000(%rax) + [-] store.f64 v101, v2-10000 ; bin: 66 44 0f d6 90 ffffd8f0 + return } diff --git a/cranelift/filetests/wasm/f32-memory64.cton b/cranelift/filetests/wasm/f32-memory64.cton new file mode 100644 index 0000000000..7125e66d3b --- /dev/null +++ b/cranelift/filetests/wasm/f32-memory64.cton @@ -0,0 +1,27 @@ +; Test basic code generation for f32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +set is_64bit=1 +isa intel haswell + +function %f32_load(i32, i64 vmctx) -> f32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.f32 v2 + return v3 +} + +function %f32_store(f32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: f32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} diff --git a/cranelift/filetests/wasm/f64-memory64.cton b/cranelift/filetests/wasm/f64-memory64.cton new file mode 100644 index 0000000000..1f61749e51 --- /dev/null +++ b/cranelift/filetests/wasm/f64-memory64.cton @@ -0,0 +1,27 @@ +; Test basic code generation for f64 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +set is_64bit=1 +isa intel haswell + +function %f64_load(i32, i64 vmctx) -> f64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.f64 v2 + return v3 +} + +function %f64_store(f64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: f64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} diff --git a/cranelift/filetests/wasm/i32-memory64.cton b/cranelift/filetests/wasm/i32-memory64.cton new file mode 100644 index 0000000000..0fbffa4fb8 --- /dev/null +++ b/cranelift/filetests/wasm/i32-memory64.cton @@ -0,0 +1,88 @@ +; Test basic code generation for i32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +set is_64bit=1 +isa intel haswell + +function %i32_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.i32 v2 + return v3 +} + +function %i32_store(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} + +function %i32_load8_s(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload8.i32 v2 + return v3 +} + +function %i32_load8_u(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload8.i32 v2 + return v3 +} + +function %i32_store8(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore8 v0, v3 + return +} + +function %i32_load16_s(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload16.i32 v2 + return v3 +} + +function %i32_load16_u(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload16.i32 v2 + return v3 +} + +function %i32_store16(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore16 v0, v3 + return +} + diff --git a/cranelift/filetests/wasm/i64-memory64.cton b/cranelift/filetests/wasm/i64-memory64.cton new file mode 100644 index 0000000000..bc44a2bbea --- /dev/null +++ b/cranelift/filetests/wasm/i64-memory64.cton @@ -0,0 +1,117 @@ +; Test basic code generation for i32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +set is_64bit=1 +isa intel haswell + +function %i64_load(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.i64 v2 + return v3 +} + +function %i64_store(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} + +function %i64_load8_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload8.i64 v2 + return v3 +} + +function %i64_load8_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload8.i64 v2 + return v3 +} + +function %i64_store8(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore8 v0, v3 + return +} + +function %i64_load16_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload16.i64 v2 + return v3 +} + +function %i64_load16_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload16.i64 v2 + return v3 +} + +function %i64_store16(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore16 v0, v3 + return +} + +function %i64_load32_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload32.i64 v2 + return v3 +} + +function %i64_load32_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload32.i64 v2 + return v3 +} + +function %i64_store32(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, guard 0x8000_0000 + +ebb0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore32 v0, v3 + return +} diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index ffc6d28458..7ac5f80d49 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -223,6 +223,26 @@ enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe) enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe) enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe) +# +# Float loads and stores. +# + +enc_flt(base.load.f32.any, r.fld, 0x66, 0x0f, 0x6e) +enc_flt(base.load.f32.any, r.fldDisp8, 0x66, 0x0f, 0x6e) +enc_flt(base.load.f32.any, r.fldDisp32, 0x66, 0x0f, 0x6e) + +enc_flt(base.load.f64.any, r.fld, 0xf3, 0x0f, 0x7e) +enc_flt(base.load.f64.any, r.fldDisp8, 0xf3, 0x0f, 0x7e) +enc_flt(base.load.f64.any, r.fldDisp32, 0xf3, 0x0f, 0x7e) + +enc_flt(base.store.f32.any, r.fst, 0x66, 0x0f, 0x7e) +enc_flt(base.store.f32.any, r.fstDisp8, 0x66, 0x0f, 0x7e) +enc_flt(base.store.f32.any, r.fstDisp32, 0x66, 0x0f, 0x7e) + +enc_flt(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6) +enc_flt(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6) +enc_flt(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6) + # # Call/return # diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index b45091d837..8736ca5371 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -374,6 +374,15 @@ st_abcd = TailRecipe( modrm_rm(in_reg1, in_reg0, sink); ''') +# XX /r register-indirect store of FPR with no offset. +fst = TailRecipe( + 'fst', Store, size=1, ins=(FPR, GPR), outs=(), + instp=IsEqual(Store.offset, 0), + emit=''' + PUT_OP(bits, rex2(in_reg1, in_reg0), sink); + modrm_rm(in_reg1, in_reg0, sink); + ''') + # XX /r register-indirect store with 8-bit offset. stDisp8 = TailRecipe( 'stDisp8', Store, size=2, ins=(GPR, GPR), outs=(), @@ -393,6 +402,15 @@ stDisp8_abcd = TailRecipe( let offset: i32 = offset.into(); sink.put1(offset as u8); ''') +fstDisp8 = TailRecipe( + 'fstDisp8', Store, size=2, ins=(FPR, GPR), outs=(), + instp=IsSignedInt(Store.offset, 8), + emit=''' + PUT_OP(bits, rex2(in_reg1, in_reg0), sink); + modrm_disp8(in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') # XX /r register-indirect store with 32-bit offset. stDisp32 = TailRecipe( @@ -411,6 +429,14 @@ stDisp32_abcd = TailRecipe( let offset: i32 = offset.into(); sink.put4(offset as u32); ''') +fstDisp32 = TailRecipe( + 'fstDisp32', Store, size=5, ins=(FPR, GPR), outs=(), + emit=''' + PUT_OP(bits, rex2(in_reg1, in_reg0), sink); + modrm_disp32(in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') # # Load recipes @@ -425,6 +451,15 @@ ld = TailRecipe( modrm_rm(in_reg0, out_reg0, sink); ''') +# XX /r float load with no offset. +fld = TailRecipe( + 'fld', Load, size=1, ins=(GPR), outs=(FPR), + instp=IsEqual(Load.offset, 0), + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_rm(in_reg0, out_reg0, sink); + ''') + # XX /r load with 8-bit offset. ldDisp8 = TailRecipe( 'ldDisp8', Load, size=2, ins=(GPR), outs=(GPR), @@ -436,6 +471,17 @@ ldDisp8 = TailRecipe( sink.put1(offset as u8); ''') +# XX /r float load with 8-bit offset. +fldDisp8 = TailRecipe( + 'fldDisp8', Load, size=2, ins=(GPR), outs=(FPR), + instp=IsSignedInt(Load.offset, 8), + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_disp8(in_reg0, out_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + # XX /r load with 32-bit offset. ldDisp32 = TailRecipe( 'ldDisp32', Load, size=5, ins=(GPR), outs=(GPR), @@ -447,6 +493,17 @@ ldDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# XX /r float load with 32-bit offset. +fldDisp32 = TailRecipe( + 'fldDisp32', Load, size=5, ins=(GPR), outs=(FPR), + instp=IsSignedInt(Load.offset, 32), + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_disp32(in_reg0, out_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + # # Call/return #