diff --git a/cranelift/docs/langref.rst b/cranelift/docs/langref.rst index 177d133ace..2758a1c7b8 100644 --- a/cranelift/docs/langref.rst +++ b/cranelift/docs/langref.rst @@ -476,6 +476,11 @@ these instructions is undefined. If it is addressable but not There are also more restricted operations for accessing specific types of memory objects. +Additionally, instructions are provided for handling multi-register addressing. + +.. autoinst:: load_complex +.. autoinst:: store_complex + Memory operation flags ---------------------- diff --git a/cranelift/filetests/isa/x86/binary32-float.cton b/cranelift/filetests/isa/x86/binary32-float.cton index 702972a241..7b1a07853a 100644 --- a/cranelift/filetests/isa/x86/binary32-float.cton +++ b/cranelift/filetests/isa/x86/binary32-float.cton @@ -227,6 +227,32 @@ ebb0: ; asm: ucomiss %xmm5, %xmm5 [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed + ; Load/Store Complex + + [-,%rax] v350 = iconst.i32 1 + [-,%rbx] v351 = iconst.i32 2 + + ; asm: movss (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 + ; asm: movss 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 + ; asm: movss -0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce + ; asm: movss 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 + ; asm: movss -0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0 + ; asm: movss %xmm5,(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 + ; asm: movss %xmm5,0x32(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 + ; asm: movss %xmm2,-0x32(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce + ; asm: movss %xmm5,0x2710(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 + ; asm: movss %xmm2,-0x2710(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0 + return } diff --git a/cranelift/filetests/isa/x86/binary32.cton b/cranelift/filetests/isa/x86/binary32.cton index feeabaa637..5ce1b228c9 100644 --- a/cranelift/filetests/isa/x86/binary32.cton +++ b/cranelift/filetests/isa/x86/binary32.cton @@ -432,6 +432,37 @@ ebb0: ; asm: shrl $8, %esi [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 + ; Load Complex + [-,%rax] v521 = iconst.i32 1 + [-,%rbx] v522 = iconst.i32 1 + ; asm: movl (%eax,%ebx,1), %ecx + [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movl 1(%eax,%ebx,1), %ecx + [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 + ; asm: mov 0x100000(%eax,%ebx,1),%ecx + [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 + ; asm: movzbl (%eax,%ebx,1),%ecx + [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 + ; asm: movsbl (%eax,%ebx,1),%ecx + [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 + ; asm: movzwl (%eax,%ebx,1),%ecx + [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 + ; asm: movswl (%eax,%ebx,1),%ecx + [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 + + ; Store Complex + [-,%rcx] v601 = iconst.i32 1 + ; asm: mov %ecx,(%eax,%ebx,1) + store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %ecx,0x1(%eax,%ebx,1) + store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 + ; asm: mov %ecx,0x100000(%eax,%ebx,1) + store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 + ; asm: mov %cx,(%eax,%ebx,1) + istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %cl,(%eax,%ebx,1) + istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 + ; asm: testl %ecx, %ecx ; asm: je ebb1 brz v1, ebb1 ; bin: 85 c9 74 0e diff --git a/cranelift/filetests/isa/x86/binary64-float.cton b/cranelift/filetests/isa/x86/binary64-float.cton index fc38271485..2f17c75ea6 100644 --- a/cranelift/filetests/isa/x86/binary64-float.cton +++ b/cranelift/filetests/isa/x86/binary64-float.cton @@ -241,6 +241,34 @@ ebb0: ; asm: ucomiss %xmm5, %xmm5 [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed + + ; Load/Store Complex + + [-,%rax] v350 = iconst.i64 1 + [-,%rbx] v351 = iconst.i64 2 + + ; asm: movss (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 + ; asm: movss 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 + ; asm: movss -0x32(%rax,%rbx,1),%xmm10 + [-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce + ; asm: 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 + ; asm: -0x2710(%rax,%rbx,1),%xmm10 + [-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0 + + ; asm: movsd %xmm5, (%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 + ; asm: movsd %xmm5, 50(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 + ; asm: movsd %xmm10, -50(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce + ; asm: movsd %xmm5, 10000(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 + ; asm: movsd %xmm10, -10000(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0 + return } @@ -476,6 +504,32 @@ ebb0: ; asm: ucomisd %xmm5, %xmm5 [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed + ; Load/Store Complex + + [-,%rax] v350 = iconst.i64 1 + [-,%rbx] v351 = iconst.i64 2 + ; asm: movsd (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18 + ; asm: movsd 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32 + ; asm: movsd -0x32(%rax,%rbx,1),%xmm10 + [-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce + ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710 + ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10 + [-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0 + + ; asm: movsd %xmm5, (%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18 + ; asm: movsd %xmm5, 50(%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32 + ; asm: movsd %xmm10, -50(%rax,%rbx,1) + [-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce + ; asm: movsd %xmm5, 10000(%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710 + ; asm: movsd %xmm10, -10000(%rax,%rbx,1) + [-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0 + return } diff --git a/cranelift/filetests/isa/x86/binary64.cton b/cranelift/filetests/isa/x86/binary64.cton index af625fa1bb..2432736e64 100644 --- a/cranelift/filetests/isa/x86/binary64.cton +++ b/cranelift/filetests/isa/x86/binary64.cton @@ -594,6 +594,80 @@ ebb0: [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f + ; Load Complex + [-,%rax] v521 = iconst.i64 1 + [-,%rbx] v522 = iconst.i64 1 + [-,%rdi] v523 = iconst.i32 1 + [-,%rsi] v524 = iconst.i32 1 + ; asm: movq (%rax,%rbx,1), %rcx + [-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18 + ; asm: movl (%rax,%rbx,1), %ecx + [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movq 1(%rax,%rbx,1), %rcx + [-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01 + ; asm: movl 1(%rax,%rbx,1), %ecx + [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 + ; asm: mov 0x100000(%rax,%rbx,1),%rcx + [-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000 + ; asm: mov 0x100000(%rax,%rbx,1),%ecx + [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 + ; asm: movzbq (%rax,%rbx,1),%rcx + [-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18 + ; asm: movzbl (%rax,%rbx,1),%ecx + [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 + ; asm: movsbq (%rax,%rbx,1),%rcx + [-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18 + ; asm: movsbl (%rax,%rbx,1),%ecx + [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 + ; asm: movzwq (%rax,%rbx,1),%rcx + [-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18 + ; asm: movzwl (%rax,%rbx,1),%ecx + [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 + ; asm: movswq (%rax,%rbx,1),%rcx + [-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18 + ; asm: movswl (%rax,%rbx,1),%ecx + [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 + ; asm: mov (%rax,%rbx,1),%ecx + [-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movslq (%rax,%rbx,1),%rcx + [-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18 + [-,%r13] v550 = iconst.i64 1 + [-,%r14] v551 = iconst.i64 1 + ; asm: mov 0x0(%r13,%r14,1),%r12d + [-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00 + + ; Store Complex + [-,%rcx] v600 = iconst.i64 1 + [-,%rcx] v601 = iconst.i32 1 + [-,%r10] v602 = iconst.i64 1 + [-,%r11] v603 = iconst.i32 1 + ; asm: mov %rcx,(%rax,%rbx,1) + store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18 + ; asm: mov %rcx,0x1(%rax,%rbx,1) + store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01 + ; asm: mov %rcx,0x100000(%rax,%rbx,1) + store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000 + ; asm: mov %ecx,(%rax,%rbx,1) + store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %ecx,0x1(%rax,%rbx,1) + store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 + ; asm: mov %ecx,0x100000(%rax,%rbx,1) + store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 + ; asm: mov %ecx,(%rax,%rbx,1) + istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %cx,(%rax,%rbx,1) + istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %cx,(%rax,%rbx,1) + istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %r10w,(%rax,%rbx,1) + istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18 + ; asm: mov %r11w,(%rax,%rbx,1) + istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18 + ; asm: mov %cl,(%rax,%rbx,1) + istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18 + ; asm: mov %cl,(%rax,%rbx,1) + istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 + ; asm: testq %rcx, %rcx ; asm: je ebb1 brz v1, ebb1 ; bin: 48 85 c9 74 1b diff --git a/cranelift/filetests/parser/tiny.cton b/cranelift/filetests/parser/tiny.cton index 2a342a666b..49628a27d6 100644 --- a/cranelift/filetests/parser/tiny.cton +++ b/cranelift/filetests/parser/tiny.cton @@ -158,9 +158,13 @@ ebb0(v1: i32): v6 = load.i64 aligned notrap v1 v7 = load.i64 v1-12 v8 = load.i64 notrap v1+0x1_0000 + v9 = load_complex.i64 v1+v2 + v10 = load_complex.i64 v1+v2+0x1 store v2, v1 store aligned v3, v1+12 store notrap aligned v3, v1-12 + store_complex v3, v1+v2 + store_complex v3, v1+v2+0x1 } ; sameln: function %memory(i32) fast { ; nextln: ebb0(v1: i32): @@ -171,9 +175,13 @@ ebb0(v1: i32): ; nextln: v6 = load.i64 notrap aligned v1 ; nextln: v7 = load.i64 v1-12 ; nextln: v8 = load.i64 notrap v1+0x0001_0000 +; nextln: v9 = load_complex.i64 v1+v2 +; nextln: v10 = load_complex.i64 v1+v2+1 ; nextln: store v2, v1 ; nextln: store aligned v3, v1+12 ; nextln: store notrap aligned v3, v1-12 +; nextln: store_complex v3, v1+v2 +; nextln: store_complex v3, v1+v2+1 ; Register diversions. ; This test file has no ISA, so we can unly use register unit numbers. diff --git a/cranelift/filetests/postopt/complex_memory_ops.cton b/cranelift/filetests/postopt/complex_memory_ops.cton new file mode 100644 index 0000000000..8f12e61577 --- /dev/null +++ b/cranelift/filetests/postopt/complex_memory_ops.cton @@ -0,0 +1,95 @@ +test postopt +set is_64bit +isa x86 + +function %dual_loads(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): +[RexOp1rr#8001] v3 = iadd v0, v1 + v4 = load.i64 v3 + v5 = uload8.i64 v3 + v6 = sload8.i64 v3 + v7 = uload16.i64 v3 + v8 = sload16.i64 v3 + v9 = uload32.i64 v3 + v10 = sload32.i64 v3 +[Op1ret#c3] return v10 +} + +; sameln: function %dual_loads +; nextln: ebb0(v0: i64, v1: i64): +; nextln: v3 = iadd v0, v1 +; nextln: v4 = load_complex.i64 v0+v1 +; nextln: v5 = uload8_complex.i64 v0+v1 +; nextln: v6 = sload8_complex.i64 v0+v1 +; nextln: v7 = uload16_complex.i64 v0+v1 +; nextln: v8 = sload16_complex.i64 v0+v1 +; nextln: v9 = uload32_complex v0+v1 +; nextln: v10 = sload32_complex v0+v1 +; nextln: return v10 +; nextln: } + +function %dual_loads2(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): +[RexOp1rr#8001] v3 = iadd v0, v1 + v4 = load.i64 v3+1 + v5 = uload8.i64 v3+1 + v6 = sload8.i64 v3+1 + v7 = uload16.i64 v3+1 + v8 = sload16.i64 v3+1 + v9 = uload32.i64 v3+1 + v10 = sload32.i64 v3+1 +[Op1ret#c3] return v10 +} + +; sameln: function %dual_loads2 +; nextln: ebb0(v0: i64, v1: i64): +; nextln: v3 = iadd v0, v1 +; nextln: v4 = load_complex.i64 v0+v1+1 +; nextln: v5 = uload8_complex.i64 v0+v1+1 +; nextln: v6 = sload8_complex.i64 v0+v1+1 +; nextln: v7 = uload16_complex.i64 v0+v1+1 +; nextln: v8 = sload16_complex.i64 v0+v1+1 +; nextln: v9 = uload32_complex v0+v1+1 +; nextln: v10 = sload32_complex v0+v1+1 +; nextln: return v10 +; nextln: } + +function %dual_stores(i64, i64, i64) { +ebb0(v0: i64, v1: i64, v2: i64): +[RexOp1rr#8001] v3 = iadd v0, v1 +[RexOp1st#8089] store.i64 v2, v3 +[RexOp1st#88] istore8.i64 v2, v3 +[RexMp1st#189] istore16.i64 v2, v3 +[RexOp1st#89] istore32.i64 v2, v3 +[Op1ret#c3] return +} + +; sameln: function %dual_stores +; nextln: ebb0(v0: i64, v1: i64, v2: i64): +; nextln: v3 = iadd v0, v1 +; nextln: store_complex v2, v0+v1 +; nextln: istore8_complex v2, v0+v1 +; nextln: istore16_complex v2, v0+v1 +; nextln: istore32_complex v2, v0+v1 +; nextln: return +; nextln: } + +function %dual_stores2(i64, i64, i64) { +ebb0(v0: i64, v1: i64, v2: i64): +[RexOp1rr#8001] v3 = iadd v0, v1 +[RexOp1stDisp8#8089] store.i64 v2, v3+1 +[RexOp1stDisp8#88] istore8.i64 v2, v3+1 +[RexMp1stDisp8#189] istore16.i64 v2, v3+1 +[RexOp1stDisp8#89] istore32.i64 v2, v3+1 +[Op1ret#c3] return +} + +; sameln: function %dual_stores2 +; nextln: ebb0(v0: i64, v1: i64, v2: i64): +; nextln: v3 = iadd v0, v1 +; nextln: store_complex v2, v0+v1+1 +; nextln: istore8_complex v2, v0+v1+1 +; nextln: istore16_complex v2, v0+v1+1 +; nextln: istore32_complex v2, v0+v1+1 +; nextln: return +; nextln: } diff --git a/lib/codegen/meta/base/formats.py b/lib/codegen/meta/base/formats.py index 89ef30881f..817ac0a87d 100644 --- a/lib/codegen/meta/base/formats.py +++ b/lib/codegen/meta/base/formats.py @@ -57,7 +57,9 @@ CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS) FuncAddr = InstructionFormat(func_ref) Load = InstructionFormat(memflags, VALUE, offset32) +LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32) Store = InstructionFormat(memflags, VALUE, VALUE, offset32) +StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32) StackLoad = InstructionFormat(stack_slot, offset32) StackStore = InstructionFormat(VALUE, stack_slot, offset32) diff --git a/lib/codegen/meta/base/instructions.py b/lib/codegen/meta/base/instructions.py index 8fee97ae42..3107c9ca41 100644 --- a/lib/codegen/meta/base/instructions.py +++ b/lib/codegen/meta/base/instructions.py @@ -246,6 +246,7 @@ x = Operand('x', Mem, doc='Value to be stored') a = Operand('a', Mem, doc='Value loaded') p = Operand('p', iAddr) Flags = Operand('Flags', memflags) +args = Operand('args', VARIABLE_ARGS, doc='Address arguments') load = Instruction( 'load', r""" @@ -256,6 +257,15 @@ load = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +load_complex = Instruction( + 'load_complex', r""" + Load from memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + store = Instruction( 'store', r""" Store ``x`` to memory at ``p + Offset``. @@ -265,6 +275,16 @@ store = Instruction( """, ins=(Flags, x, p, Offset), can_store=True) +store_complex = Instruction( + 'store_complex', r""" + Store ``x`` to memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can store any value type with a + memory representation. + """, + ins=(Flags, x, args, Offset), can_store=True) + + iExt8 = TypeVar( 'iExt8', 'An integer type with more than 8 bits', ints=(16, 64)) @@ -279,6 +299,14 @@ uload8 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +uload8_complex = Instruction( + 'uload8_complex', r""" + Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + sload8 = Instruction( 'sload8', r""" Load 8 bits from memory at ``p + Offset`` and sign-extend. @@ -287,6 +315,14 @@ sload8 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +sload8_complex = Instruction( + 'sload8_complex', r""" + Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + istore8 = Instruction( 'istore8', r""" Store the low 8 bits of ``x`` to memory at ``p + Offset``. @@ -295,6 +331,14 @@ istore8 = Instruction( """, ins=(Flags, x, p, Offset), can_store=True) +istore8_complex = Instruction( + 'istore8_complex', r""" + Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i8`` followed by ``store.i8``. + """, + ins=(Flags, x, args, Offset), can_store=True) + iExt16 = TypeVar( 'iExt16', 'An integer type with more than 16 bits', ints=(32, 64)) @@ -309,6 +353,14 @@ uload16 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +uload16_complex = Instruction( + 'uload16_complex', r""" + Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + sload16 = Instruction( 'sload16', r""" Load 16 bits from memory at ``p + Offset`` and sign-extend. @@ -317,6 +369,14 @@ sload16 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +sload16_complex = Instruction( + 'sload16_complex', r""" + Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + istore16 = Instruction( 'istore16', r""" Store the low 16 bits of ``x`` to memory at ``p + Offset``. @@ -325,6 +385,14 @@ istore16 = Instruction( """, ins=(Flags, x, p, Offset), can_store=True) +istore16_complex = Instruction( + 'istore16_complex', r""" + Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. + """, + ins=(Flags, x, args, Offset), can_store=True) + iExt32 = TypeVar( 'iExt32', 'An integer type with more than 32 bits', ints=(64, 64)) @@ -339,6 +407,14 @@ uload32 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +uload32_complex = Instruction( + 'uload32_complex', r""" + Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + sload32 = Instruction( 'sload32', r""" Load 32 bits from memory at ``p + Offset`` and sign-extend. @@ -347,6 +423,14 @@ sload32 = Instruction( """, ins=(Flags, p, Offset), outs=a, can_load=True) +sload32_complex = Instruction( + 'sload32_complex', r""" + Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + """, + ins=(Flags, args, Offset), outs=a, can_load=True) + istore32 = Instruction( 'istore32', r""" Store the low 32 bits of ``x`` to memory at ``p + Offset``. @@ -355,6 +439,14 @@ istore32 = Instruction( """, ins=(Flags, x, p, Offset), can_store=True) +istore32_complex = Instruction( + 'istore32_complex', r""" + Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. + """, + ins=(Flags, x, args, Offset), can_store=True) + x = Operand('x', Mem, doc='Value to be stored') a = Operand('a', Mem, doc='Value loaded') Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot') diff --git a/lib/codegen/meta/base/predicates.py b/lib/codegen/meta/base/predicates.py index 1a6b4c2c75..44f135e0d3 100644 --- a/lib/codegen/meta/base/predicates.py +++ b/lib/codegen/meta/base/predicates.py @@ -2,12 +2,12 @@ Cretonne predicates that consider `Function` fields. """ from cdsl.predicates import FieldPredicate -from .formats import UnaryGlobalVar +from .formats import UnaryGlobalVar, InstructionFormat try: from typing import TYPE_CHECKING if TYPE_CHECKING: - from cdsl.formats import FormatField # noqa + from cdsl.formats import InstructionFormat, FormatField # noqa except ImportError: pass @@ -33,3 +33,10 @@ class IsColocatedData(FieldPredicate): # type: () -> None super(IsColocatedData, self).__init__( UnaryGlobalVar.global_var, 'is_colocated_data', ('func',)) + + +class LengthEquals(FieldPredicate): + def __init__(self, iform, num): + # type: (InstructionFormat, int) -> None + super(LengthEquals, self).__init__( + iform.args(), 'has_length_of', (num, 'func')) diff --git a/lib/codegen/meta/cdsl/formats.py b/lib/codegen/meta/cdsl/formats.py index aba83ed7a2..c8dd58fc7f 100644 --- a/lib/codegen/meta/cdsl/formats.py +++ b/lib/codegen/meta/cdsl/formats.py @@ -103,6 +103,19 @@ class InstructionFormat(object): InstructionFormat._registry[sig] = self InstructionFormat.all_formats.append(self) + def args(self): + # type: () -> FormatField + """ + Provides a ValueListField, which is derived from FormatField, + corresponding to the full ValueList of the instruction format. This + is useful for creating predicates for instructions which use variadic + arguments. + """ + + if self.has_value_list: + return ValueListField(self) + return None + def _process_member_names(self, kinds): # type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField] # noqa """ @@ -210,7 +223,7 @@ class FormatField(object): This corresponds to a single member of a variant of the `InstructionData` data type. - :param iformat: Parent `InstructionFormat`. + :param iform: Parent `InstructionFormat`. :param immnum: Immediate operand number in parent. :param kind: Immediate Operand kind. :param member: Member name in `InstructionData` variant. @@ -227,6 +240,29 @@ class FormatField(object): # type: () -> str return '{}.{}'.format(self.format.name, self.member) + def rust_destructuring_name(self): + # type: () -> str + return self.member + def rust_name(self): # type: () -> str return self.member + + +class ValueListField(FormatField): + """ + The full value list field of an instruction format. + + This corresponds to all Value-type members of a variant of the + `InstructionData` format, which contains a ValueList. + + :param iform: Parent `InstructionFormat`. + """ + def __init__(self, iform): + # type: (InstructionFormat) -> None + self.format = iform + self.member = "args" + + def rust_destructuring_name(self): + # type: () -> str + return 'ref {}'.format(self.member) diff --git a/lib/codegen/meta/cdsl/instructions.py b/lib/codegen/meta/cdsl/instructions.py index 3ef95c9cf9..f972f82bc7 100644 --- a/lib/codegen/meta/cdsl/instructions.py +++ b/lib/codegen/meta/cdsl/instructions.py @@ -201,9 +201,10 @@ class Instruction(object): # Prefer to use the typevar_operand to infer the controlling typevar. self.use_typevar_operand = False typevar_error = None - if self.format.typevar_operand is not None: + tv_op = self.format.typevar_operand + if tv_op is not None and tv_op < len(self.value_opnums): try: - opnum = self.value_opnums[self.format.typevar_operand] + opnum = self.value_opnums[tv_op] tv = self.ins[opnum].typevar if tv is tv.free_typevar() or tv.singleton_type() is not None: self.other_typevars = self._verify_ctrl_typevar(tv) diff --git a/lib/codegen/meta/gen_binemit.py b/lib/codegen/meta/gen_binemit.py index 5888d89e10..c813d12977 100644 --- a/lib/codegen/meta/gen_binemit.py +++ b/lib/codegen/meta/gen_binemit.py @@ -27,7 +27,7 @@ def gen_recipe(recipe, fmt): nvops = iform.num_value_operands want_args = any(isinstance(i, RegClass) or isinstance(i, Stack) for i in recipe.ins) - assert not want_args or nvops > 0 + assert not want_args or nvops > 0 or iform.has_value_list want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack) for o in recipe.outs) diff --git a/lib/codegen/meta/gen_encoding.py b/lib/codegen/meta/gen_encoding.py index e3915100a2..a81b66dcca 100644 --- a/lib/codegen/meta/gen_encoding.py +++ b/lib/codegen/meta/gen_encoding.py @@ -103,7 +103,7 @@ def emit_instp(instp, fmt, has_func=False): fnames = set() # type: Set[str] for p in leafs: if isinstance(p, FieldPredicate): - fnames.add(p.field.rust_name()) + fnames.add(p.field.rust_destructuring_name()) else: assert isinstance(p, TypePredicate) has_type_check = True diff --git a/lib/codegen/meta/isa/x86/encodings.py b/lib/codegen/meta/isa/x86/encodings.py index 2284a0d70f..1a9260a28d 100644 --- a/lib/codegen/meta/isa/x86/encodings.py +++ b/lib/codegen/meta/isa/x86/encodings.py @@ -3,9 +3,9 @@ x86 Encodings. """ from __future__ import absolute_import from cdsl.predicates import IsUnsignedInt, Not, And -from base.predicates import IsColocatedFunc, IsColocatedData +from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals from base import instructions as base -from base.formats import UnaryImm, FuncAddr, Call +from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex from .defs import X86_64, X86_32 from . import recipes as r from . import settings as cfg @@ -19,6 +19,7 @@ try: from typing import TYPE_CHECKING, Any # noqa if TYPE_CHECKING: from cdsl.instructions import MaybeBoundInst # noqa + from cdsl.predicates import FieldPredicate # noqa except ImportError: pass @@ -54,6 +55,15 @@ def enc_x86_64(inst, recipe, *args, **kwargs): X86_64.enc(inst, *recipe(*args, **kwargs)) +def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs): + # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None + """ + Add encodings for `inst` to X86_64 with and without a REX prefix. + """ + X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp) + X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp) + + def enc_both(inst, recipe, *args, **kwargs): # type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None """ @@ -63,6 +73,15 @@ def enc_both(inst, recipe, *args, **kwargs): enc_x86_64(inst, recipe, *args, **kwargs) +def enc_both_instp(inst, recipe, instp, *args, **kwargs): + # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None + """ + Add encodings for `inst` to both X86_32 and X86_64. + """ + X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp) + enc_x86_64_instp(inst, recipe, instp, *args, **kwargs) + + def enc_i32_i64(inst, recipe, *args, **kwargs): # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None """ @@ -80,6 +99,25 @@ def enc_i32_i64(inst, recipe, *args, **kwargs): X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs)) +def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs): + # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None + """ + Add encodings for `inst.i32` to X86_32. + Add encodings for `inst.i32` to X86_64 with and without REX. + Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + + Similar to `enc_i32_i64` but applies `instp` to each encoding. + """ + X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp) + + # REX-less encoding must come after REX encoding so we don't use it by + # default. Otherwise reg-alloc would never use r8 and up. + X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp) + X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp) + + X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp) + + def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs): # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None """ @@ -212,6 +250,31 @@ X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) # # Loads and stores. # + +ldcomplexp = LengthEquals(LoadComplex, 2) +for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]: + enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b) + enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b) + X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1), + instp=ldcomplexp) + enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7) + enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf) + enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6) + enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe) + +stcomplexp = LengthEquals(StoreComplex, 3) +for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]: + enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89) + enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89) + enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89) + enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89) + +for recipe in [r.stWithIndex_abcd, + r.stWithIndexDisp8_abcd, + r.stWithIndexDisp32_abcd]: + enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88) + enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88) + for recipe in [r.st, r.stDisp8, r.stDisp32]: enc_i32_i64_ld_st(base.store, True, recipe, 0x89) enc_x86_64(base.istore32.i64.any, recipe, 0x89) @@ -286,18 +349,34 @@ enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10) enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10) enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10) +enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10) +enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10) +enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10) + enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10) enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10) enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10) +enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10) +enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10) +enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10) + enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11) enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11) enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11) +enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11) +enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11) +enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11) + enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11) enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11) enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11) +enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11) +enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11) +enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11) + enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10) enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10) enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10) diff --git a/lib/codegen/meta/isa/x86/recipes.py b/lib/codegen/meta/isa/x86/recipes.py index ccd85c0fef..d1f4bac0c2 100644 --- a/lib/codegen/meta/isa/x86/recipes.py +++ b/lib/codegen/meta/isa/x86/recipes.py @@ -14,6 +14,7 @@ from base.formats import IntSelect, IntCondTrap, FloatCondTrap from base.formats import Jump, Branch, BranchInt, BranchFloat from base.formats import Ternary, FuncAddr, UnaryGlobalVar from base.formats import RegMove, RegSpill, RegFill, CopySpecial +from base.formats import LoadComplex, StoreComplex from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG from .registers import StackGPR32, StackFPR32 @@ -739,6 +740,22 @@ st = TailRecipe( modrm_rm(in_reg1, in_reg0, sink); ''') +# XX /r register-indirect store with index and no offset. +stWithIndex = TailRecipe( + 'stWithIndex', StoreComplex, size=2, + ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + outs=(), + instp=IsEqual(StoreComplex.offset, 0), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + ''') + # XX /r register-indirect store with no offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. st_abcd = TailRecipe( @@ -754,6 +771,23 @@ st_abcd = TailRecipe( modrm_rm(in_reg1, in_reg0, sink); ''') +# XX /r register-indirect store with index and no offset. +# Only ABCD allowed for stored value. This is for byte stores with no REX. +stWithIndex_abcd = TailRecipe( + 'stWithIndex_abcd', StoreComplex, size=2, + ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + outs=(), + instp=IsEqual(StoreComplex.offset, 0), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + ''') + # XX /r register-indirect store of FPR with no offset. fst = TailRecipe( 'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(), @@ -766,6 +800,20 @@ fst = TailRecipe( PUT_OP(bits, rex2(in_reg1, in_reg0), sink); modrm_rm(in_reg1, in_reg0, sink); ''') +# XX /r register-indirect store with index and no offset of FPR. +fstWithIndex = TailRecipe( + 'fstWithIndex', StoreComplex, size=2, + ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(), + instp=IsEqual(StoreComplex.offset, 0), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + ''') # XX /r register-indirect store with 8-bit offset. stDisp8 = TailRecipe( @@ -781,6 +829,27 @@ stDisp8 = TailRecipe( let offset: i32 = offset.into(); sink.put1(offset as u8); ''') + +# XX /r register-indirect store with index and 8-bit offset. +stWithIndexDisp8 = TailRecipe( + 'stWithIndexDisp8', StoreComplex, size=3, + ins=(GPR, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 8), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + +# XX /r register-indirect store with 8-bit offset. +# Only ABCD allowed for stored value. This is for byte stores with no REX. stDisp8_abcd = TailRecipe( 'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(), instp=IsSignedInt(Store.offset, 8), @@ -795,6 +864,27 @@ stDisp8_abcd = TailRecipe( let offset: i32 = offset.into(); sink.put1(offset as u8); ''') + +# XX /r register-indirect store with index and 8-bit offset. +# Only ABCD allowed for stored value. This is for byte stores with no REX. +stWithIndexDisp8_abcd = TailRecipe( + 'stWithIndexDisp8_abcd', StoreComplex, size=3, + ins=(ABCD, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 8), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + +# XX /r register-indirect store with 8-bit offset of FPR. fstDisp8 = TailRecipe( 'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(), instp=IsSignedInt(Store.offset, 8), @@ -809,6 +899,24 @@ fstDisp8 = TailRecipe( sink.put1(offset as u8); ''') +# XX /r register-indirect store with index and 8-bit offset of FPR. +fstWithIndexDisp8 = TailRecipe( + 'fstWithIndexDisp8', StoreComplex, size=3, + ins=(FPR, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 8), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + # XX /r register-indirect store with 32-bit offset. stDisp32 = TailRecipe( 'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(), @@ -822,6 +930,27 @@ stDisp32 = TailRecipe( let offset: i32 = offset.into(); sink.put4(offset as u32); ''') + +# XX /r register-indirect store with index and 32-bit offset. +stWithIndexDisp32 = TailRecipe( + 'stWithIndexDisp32', StoreComplex, size=6, + ins=(GPR, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 32), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + +# XX /r register-indirect store with 32-bit offset. +# Only ABCD allowed for stored value. This is for byte stores with no REX. stDisp32_abcd = TailRecipe( 'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(), when_prefixed=stDisp32, @@ -835,6 +964,27 @@ stDisp32_abcd = TailRecipe( let offset: i32 = offset.into(); sink.put4(offset as u32); ''') + +# XX /r register-indirect store with index and 32-bit offset. +# Only ABCD allowed for stored value. This is for byte stores with no REX. +stWithIndexDisp32_abcd = TailRecipe( + 'stWithIndexDisp32_abcd', StoreComplex, size=6, + ins=(ABCD, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 32), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + +# XX /r register-indirect store with 32-bit offset of FPR. fstDisp32 = TailRecipe( 'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(), clobbers_flags=False, @@ -848,6 +998,24 @@ fstDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# XX /r register-indirect store with index and 32-bit offset of FPR. +fstWithIndexDisp32 = TailRecipe( + 'fstWithIndexDisp32', StoreComplex, size=6, + ins=(FPR, GPR, GPR_DEREF_SAFE), + outs=(), + instp=IsSignedInt(StoreComplex.offset, 32), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + # Unary spill with SIB and 32-bit displacement. spillSib32 = TailRecipe( 'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32, @@ -919,6 +1087,22 @@ ld = TailRecipe( modrm_rm(in_reg0, out_reg0, sink); ''') +# XX /r load with index and no offset. +ldWithIndex = TailRecipe( + 'ldWithIndex', LoadComplex, size=2, + ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + outs=(GPR), + instp=IsEqual(LoadComplex.offset, 0), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + ''') + # XX /r float load with no offset. fld = TailRecipe( 'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR), @@ -932,6 +1116,22 @@ fld = TailRecipe( modrm_rm(in_reg0, out_reg0, sink); ''') +# XX /r float load with index and no offset. +fldWithIndex = TailRecipe( + 'fldWithIndex', LoadComplex, size=2, + ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + outs=(FPR), + instp=IsEqual(LoadComplex.offset, 0), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + ''') + # XX /r load with 8-bit offset. ldDisp8 = TailRecipe( 'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR), @@ -947,6 +1147,24 @@ ldDisp8 = TailRecipe( sink.put1(offset as u8); ''') +# XX /r load with index and 8-bit offset. +ldWithIndexDisp8 = TailRecipe( + 'ldWithIndexDisp8', LoadComplex, size=3, + ins=(GPR, GPR_DEREF_SAFE), + outs=(GPR), + instp=IsSignedInt(LoadComplex.offset, 8), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + # XX /r float load with 8-bit offset. fldDisp8 = TailRecipe( 'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR), @@ -962,6 +1180,24 @@ fldDisp8 = TailRecipe( sink.put1(offset as u8); ''') +# XX /r float load with 8-bit offset. +fldWithIndexDisp8 = TailRecipe( + 'fldWithIndexDisp8', LoadComplex, size=3, + ins=(GPR, GPR_DEREF_SAFE), + outs=(FPR), + instp=IsSignedInt(LoadComplex.offset, 8), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + ''') + # XX /r load with 32-bit offset. ldDisp32 = TailRecipe( 'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR), @@ -977,6 +1213,24 @@ ldDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# XX /r load with index and 32-bit offset. +ldWithIndexDisp32 = TailRecipe( + 'ldWithIndexDisp32', LoadComplex, size=6, + ins=(GPR, GPR_DEREF_SAFE), + outs=(GPR), + instp=IsSignedInt(LoadComplex.offset, 32), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + # XX /r float load with 32-bit offset. fldDisp32 = TailRecipe( 'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR), @@ -992,6 +1246,24 @@ fldDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# XX /r float load with index and 32-bit offset. +fldWithIndexDisp32 = TailRecipe( + 'fldWithIndexDisp32', LoadComplex, size=6, + ins=(GPR, GPR_DEREF_SAFE), + outs=(FPR), + instp=IsSignedInt(LoadComplex.offset, 32), + clobbers_flags=False, + emit=''' + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + ''') + # Unary fill with SIB and 32-bit displacement. fillSib32 = TailRecipe( 'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR, diff --git a/lib/codegen/src/isa/mod.rs b/lib/codegen/src/isa/mod.rs index d94eb37830..7b21e68d28 100644 --- a/lib/codegen/src/isa/mod.rs +++ b/lib/codegen/src/isa/mod.rs @@ -162,6 +162,11 @@ pub trait TargetIsa: fmt::Display { false } + /// Does the CPU implement multi-register addressing? + fn uses_complex_addresses(&self) -> bool { + false + } + /// Get a data structure describing the registers in this ISA. fn register_info(&self) -> RegInfo; diff --git a/lib/codegen/src/isa/x86/binemit.rs b/lib/codegen/src/isa/x86/binemit.rs index 3a7c14d7be..6b9c709bf2 100644 --- a/lib/codegen/src/isa/x86/binemit.rs +++ b/lib/codegen/src/isa/x86/binemit.rs @@ -46,6 +46,18 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { BASE_REX | b | (r << 2) } +// Create a three-register REX prefix, setting: +// +// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. +// REX.R = bit 3 of reg register. +// REX.X = bit 3 of SIB index register. +fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { + let b = ((rm >> 3) & 1) as u8; + let r = ((reg >> 3) & 1) as u8; + let x = ((index >> 3) & 1) as u8; + BASE_REX | b | (x << 1) | (r << 2) +} + // Emit a REX prefix. // // The R, X, and B bits are computed from registers using the functions above. The W bit is @@ -211,7 +223,19 @@ fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) sink.put1(b); } -/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present. +/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present. +fn modrm_sib(reg: RegUnit, sink: &mut CS) { + modrm_rm(0b100, reg, sink); +} + +/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit +/// displacement are present. +fn modrm_sib_disp8(reg: RegUnit, sink: &mut CS) { + modrm_disp8(0b100, reg, sink); +} + +/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit +/// displacement are present. fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { modrm_disp32(0b100, reg, sink); } @@ -225,6 +249,16 @@ fn sib_noindex(base: RegUnit, sink: &mut CS) { sink.put1(b); } +fn sib(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) { + // SIB SS_III_BBB. + debug_assert_eq!(scale & !0x03, 0, "Scale out of range"); + let scale = scale & 3; + let index = index as u8 & 7; + let base = base as u8 & 7; + let b: u8 = (scale << 6) | (index << 3) | base; + sink.put1(b); +} + /// Get the low 4 bits of an opcode for an integer condition code. /// /// Add this offset to a base opcode for: diff --git a/lib/codegen/src/isa/x86/mod.rs b/lib/codegen/src/isa/x86/mod.rs index 787ec4a992..335d95cd22 100644 --- a/lib/codegen/src/isa/x86/mod.rs +++ b/lib/codegen/src/isa/x86/mod.rs @@ -62,6 +62,10 @@ impl TargetIsa for Isa { true } + fn uses_complex_addresses(&self) -> bool { + true + } + fn register_info(&self) -> RegInfo { registers::INFO.clone() } diff --git a/lib/codegen/src/postopt.rs b/lib/codegen/src/postopt.rs index 18f467a68a..6de918d08d 100644 --- a/lib/codegen/src/postopt.rs +++ b/lib/codegen/src/postopt.rs @@ -5,9 +5,9 @@ use cursor::{Cursor, EncCursor}; use ir::condcodes::{CondCode, FloatCC, IntCC}; use ir::dfg::ValueDef; -use ir::immediates::Imm64; +use ir::immediates::{Imm64, Offset32}; use ir::instructions::{Opcode, ValueList}; -use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value}; +use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value, Type, MemFlags}; use isa::TargetIsa; use timing; @@ -173,6 +173,158 @@ fn optimize_cpu_flags( pos.func.update_encoding(info.br_inst, isa).is_ok(); } + +struct MemOpInfo { + opcode: Opcode, + inst: Inst, + itype: Type, + arg: Value, + st_arg: Option, + flags: MemFlags, + offset: Offset32, + add_args: Option<[Value; 2]>, +} + +fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) { + let mut info = match pos.func.dfg[inst] { + InstructionData::Load { + opcode, + arg, + flags, + offset, + } => MemOpInfo { + opcode: opcode, + inst: inst, + itype: pos.func.dfg.ctrl_typevar(inst), + arg: arg, + st_arg: None, + flags: flags, + offset: offset, + add_args: None, + }, + InstructionData::Store { + opcode, + args, + flags, + offset, + } => MemOpInfo { + opcode: opcode, + inst: inst, + itype: pos.func.dfg.ctrl_typevar(inst), + arg: args[1], + st_arg: Some(args[0]), + flags: flags, + offset: offset, + add_args: None, + }, + _ => return, + }; + + if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) { + match pos.func.dfg[result_inst] { + InstructionData::Binary { opcode, args } if opcode == Opcode::Iadd => { + info.add_args = Some(args.clone()); + } + _ => return, + } + } else { + return; + } + + match info.opcode { + Opcode::Load => { + pos.func.dfg.replace(info.inst).load_complex( + info.itype, + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Uload8 => { + pos.func.dfg.replace(info.inst).uload8_complex( + info.itype, + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Sload8 => { + pos.func.dfg.replace(info.inst).sload8_complex( + info.itype, + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Uload16 => { + pos.func.dfg.replace(info.inst).uload16_complex( + info.itype, + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Sload16 => { + pos.func.dfg.replace(info.inst).sload16_complex( + info.itype, + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Uload32 => { + pos.func.dfg.replace(info.inst).uload32_complex( + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Sload32 => { + pos.func.dfg.replace(info.inst).sload32_complex( + info.flags, + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Store => { + pos.func.dfg.replace(info.inst).store_complex( + info.flags, + info.st_arg.unwrap(), + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Istore8 => { + pos.func.dfg.replace(info.inst).istore8_complex( + info.flags, + info.st_arg.unwrap(), + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Istore16 => { + pos.func.dfg.replace(info.inst).istore16_complex( + info.flags, + info.st_arg.unwrap(), + &info.add_args.unwrap(), + info.offset, + ); + } + Opcode::Istore32 => { + pos.func.dfg.replace(info.inst).istore32_complex( + info.flags, + info.st_arg.unwrap(), + &info.add_args.unwrap(), + info.offset, + ); + } + _ => return, + } + pos.func.update_encoding(info.inst, isa).is_ok(); +} + + + //---------------------------------------------------------------------- // // The main post-opt pass. @@ -198,6 +350,10 @@ pub fn do_postopt(func: &mut Function, isa: &TargetIsa) { } } } + + if isa.uses_complex_addresses() { + optimize_complex_addresses(&mut pos, inst, isa); + } } } } diff --git a/lib/codegen/src/predicates.rs b/lib/codegen/src/predicates.rs index e35e7ad179..1fb700024e 100644 --- a/lib/codegen/src/predicates.rs +++ b/lib/codegen/src/predicates.rs @@ -46,6 +46,11 @@ pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool } } +#[allow(dead_code)] +pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool { + value_list.len(&func.dfg.value_lists) == num +} + #[cfg(test)] mod tests { use super::*; diff --git a/lib/codegen/src/verifier/mod.rs b/lib/codegen/src/verifier/mod.rs index 6db1f871f7..490222272e 100644 --- a/lib/codegen/src/verifier/mod.rs +++ b/lib/codegen/src/verifier/mod.rs @@ -335,6 +335,12 @@ impl<'a> Verifier<'a> { RegFill { src, .. } => { self.verify_stack_slot(inst, src)?; } + LoadComplex { ref args, .. } => { + self.verify_value_list(inst, args)?; + } + StoreComplex { ref args, .. } => { + self.verify_value_list(inst, args)?; + } // Exhaustive list so we can't forget to add new formats Unary { .. } | @@ -1149,8 +1155,8 @@ impl<'a> Verifier<'a> { mod tests { use super::{Error, Verifier}; use entity::EntityList; - use ir::Function; use ir::instructions::{InstructionData, Opcode}; + use ir::Function; use settings; macro_rules! assert_err_with_msg { diff --git a/lib/codegen/src/write.rs b/lib/codegen/src/write.rs index b3887949c2..9a5be48b6f 100644 --- a/lib/codegen/src/write.rs +++ b/lib/codegen/src/write.rs @@ -369,12 +369,44 @@ pub fn write_operands( } => write!(w, " {}, {}{}", arg, stack_slot, offset), HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm), Load { flags, arg, offset, .. } => write!(w, "{} {}{}", flags, arg, offset), + LoadComplex { + flags, + ref args, + offset, + .. + } => { + let args = args.as_slice(pool); + write!( + w, + "{} {}{}", + flags, + DisplayValuesWithDelimiter(&args, '+'), + offset + ) + + } Store { flags, args, offset, .. } => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset), + StoreComplex { + flags, + ref args, + offset, + .. + } => { + let args = args.as_slice(pool); + write!( + w, + "{} {}, {}{}", + flags, + args[0], + DisplayValuesWithDelimiter(&args[1..], '+'), + offset + ) + } RegMove { arg, src, dst, .. } => { if let Some(isa) = isa { let regs = isa.register_info(); @@ -450,6 +482,21 @@ impl<'a> fmt::Display for DisplayValues<'a> { } } +struct DisplayValuesWithDelimiter<'a>(&'a [Value], char); + +impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result { + for (i, val) in self.0.iter().enumerate() { + if i == 0 { + write!(f, "{}", val)?; + } else { + write!(f, "{}{}", self.1, val)?; + } + } + Ok(()) + } +} + #[cfg(test)] mod tests { use ir::types; diff --git a/lib/reader/src/lexer.rs b/lib/reader/src/lexer.rs index f6d89881fb..17b99249fb 100644 --- a/lib/reader/src/lexer.rs +++ b/lib/reader/src/lexer.rs @@ -22,6 +22,7 @@ pub enum Token<'a> { LBracket, // '[' RBracket, // ']' Minus, // '-' + Plus, // '+' Comma, // ',' Dot, // '.' Colon, // ':' @@ -169,6 +170,25 @@ impl<'a> Lexer<'a> { self.source[self.pos..].starts_with(prefix) } + // Starting from `lookahead`, are we looking at a number? + fn looking_at_numeric(&self) -> bool { + if let Some(c) = self.lookahead { + if c.is_digit(10) { + return true; + } + match c { + '-' => return true, + '+' => return true, + '.' => return true, + _ => {} + } + if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") { + return true; + } + } + false + } + // Scan a single-char token. fn scan_char(&mut self, tok: Token<'a>) -> Result, LocatedError> { assert_ne!(self.lookahead, None); @@ -234,16 +254,17 @@ impl<'a> Lexer<'a> { match self.lookahead { Some('-') => { self.next_ch(); - - if let Some(c) = self.lookahead { - // If the next character won't parse as a number, we return Token::Minus - if !c.is_alphanumeric() && c != '.' { - return token(Token::Minus, loc); - } + if !self.looking_at_numeric() { + // If the next characters won't parse as a number, we return Token::Minus + return token(Token::Minus, loc); } } Some('+') => { self.next_ch(); + if !self.looking_at_numeric() { + // If the next characters won't parse as a number, we return Token::Minus + return token(Token::Plus, loc); + } } _ => {} } diff --git a/lib/reader/src/parser.rs b/lib/reader/src/parser.rs index 67e6939910..40af898efc 100644 --- a/lib/reader/src/parser.rs +++ b/lib/reader/src/parser.rs @@ -13,8 +13,8 @@ use cretonne_codegen::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFun Type, Value, ValueLoc}; use cretonne_codegen::isa::{self, Encoding, RegUnit, TargetIsa}; use cretonne_codegen::packed_option::ReservedValue; -use cretonne_codegen::{settings, timing}; use cretonne_codegen::settings::CallConv; +use cretonne_codegen::{settings, timing}; use error::{Error, Location, Result}; use isaspec; use lexer::{self, Lexer, Token}; @@ -1872,6 +1872,24 @@ impl<'a> Parser<'a> { Ok(args) } + fn parse_value_sequence(&mut self) -> Result { + let mut args = VariableArgs::new(); + + if let Some(Token::Value(v)) = self.token() { + args.push(v); + self.consume(); + } else { + return Ok(args); + } + + while self.optional(Token::Plus) { + args.push(self.match_value("expected value in argument list")?); + } + + Ok(args) + + } + // Parse an optional value list enclosed in parantheses. fn parse_opt_value_list(&mut self) -> Result { if !self.optional(Token::LPar) { @@ -2267,6 +2285,17 @@ impl<'a> Parser<'a> { offset, } } + InstructionFormat::LoadComplex => { + let flags = self.optional_memflags(); + let args = self.parse_value_sequence()?; + let offset = self.optional_offset32()?; + InstructionData::LoadComplex { + opcode, + flags, + args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists), + offset, + } + } InstructionFormat::Store => { let flags = self.optional_memflags(); let arg = self.match_value("expected SSA value operand")?; @@ -2283,6 +2312,23 @@ impl<'a> Parser<'a> { offset, } } + + InstructionFormat::StoreComplex => { + let flags = self.optional_memflags(); + let src = self.match_value("expected SSA value operand")?; + self.match_token( + Token::Comma, + "expected ',' between operands", + )?; + let args = self.parse_value_sequence()?; + let offset = self.optional_offset32()?; + InstructionData::StoreComplex { + opcode, + flags, + args: args.into_value_list(&[src], &mut ctx.function.dfg.value_lists), + offset, + } + } InstructionFormat::RegMove => { let arg = self.match_value("expected SSA value operand")?; self.match_token( @@ -2402,9 +2448,9 @@ impl<'a> Parser<'a> { #[cfg(test)] mod tests { use super::*; - use cretonne_codegen::ir::StackSlotKind; use cretonne_codegen::ir::entities::AnyEntity; use cretonne_codegen::ir::types; + use cretonne_codegen::ir::StackSlotKind; use cretonne_codegen::ir::{ArgumentExtension, ArgumentPurpose}; use cretonne_codegen::settings::CallConv; use error::Error;