load_complex and store_complex instructions (#309)
* Start adding the load_complex and store_complex instructions. N.b.: The text format is not correct yet. Requires changes to the lexer and parser. I'm not sure why I needed to change the RuntimeError to Exception yet. Will fix. * Get first few encodings of load_complex working. Still needs var args type checking. * Clean up ModRM helper functions in binemit. * Implement 32-bit displace for load_complex * Use encoding helpers instead of doing them all by hand * Initial implementation of store_complex * Parse value list for load/store_complex with + as delimiter. Looks nice. * Add sign/zero-extension and size variants for load_complex. * Add size variants of store_complex. * Add asm helper lines to load/store complex bin tests. * Example of length-checking the instruction ValueList for an encoding. Extremely questionable implementation. * Fix Python linting issues * First draft of postopt pass to fold adds and loads into load_complex. Just simple loads for now. * Optimization pass now works with all types of loads. * Add store+add -> store_complex to postopt pass * Put complex address optimization behind ISA flag. * Add load/store complex for f32 and f64 * Fixes changes to lexer that broke NaN parsing. Abstracts away the repeated checks for whether or not the characters following a + or - are going to be parsed as a number or not. * Fix formatting issues * Fix register restrictions for complex addresses. * Encoding tests for x86-32. * Add documentation for newly added instructions, recipes, and cdsl changes. * Fix python formatting again * Apply value-list length predicates to all LoadComplex and StoreComplex instructions. * Add predicate types to new encoding helpers for mypy. * Import FieldPredicate to satisfy mypy. * Add and fix some "asm" strings in the encoding tests. * Line-up 'bin' comments in x86/binary64 test * Test parsing of offset-less store_complex instruction. * 'sNaN' not 'sNan' * Bounds check the lookup for polymorphic typevar operand. * Fix encodings for istore16_complex.
This commit is contained in:
committed by
Dan Gohman
parent
5aa84a744b
commit
f636d795c5
@@ -476,6 +476,11 @@ these instructions is undefined. If it is addressable but not
|
||||
There are also more restricted operations for accessing specific types of memory
|
||||
objects.
|
||||
|
||||
Additionally, instructions are provided for handling multi-register addressing.
|
||||
|
||||
.. autoinst:: load_complex
|
||||
.. autoinst:: store_complex
|
||||
|
||||
Memory operation flags
|
||||
----------------------
|
||||
|
||||
|
||||
@@ -227,6 +227,32 @@ ebb0:
|
||||
; asm: ucomiss %xmm5, %xmm5
|
||||
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
|
||||
|
||||
; Load/Store Complex
|
||||
|
||||
[-,%rax] v350 = iconst.i32 1
|
||||
[-,%rbx] v351 = iconst.i32 2
|
||||
|
||||
; asm: movss (%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18
|
||||
; asm: movss 0x32(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32
|
||||
; asm: movss -0x32(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce
|
||||
; asm: movss 0x2710(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710
|
||||
; asm: movss -0x2710(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0
|
||||
; asm: movss %xmm5,(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18
|
||||
; asm: movss %xmm5,0x32(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32
|
||||
; asm: movss %xmm2,-0x32(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce
|
||||
; asm: movss %xmm5,0x2710(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710
|
||||
; asm: movss %xmm2,-0x2710(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -432,6 +432,37 @@ ebb0:
|
||||
; asm: shrl $8, %esi
|
||||
[-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08
|
||||
|
||||
; Load Complex
|
||||
[-,%rax] v521 = iconst.i32 1
|
||||
[-,%rbx] v522 = iconst.i32 1
|
||||
; asm: movl (%eax,%ebx,1), %ecx
|
||||
[-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18
|
||||
; asm: movl 1(%eax,%ebx,1), %ecx
|
||||
[-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01
|
||||
; asm: mov 0x100000(%eax,%ebx,1),%ecx
|
||||
[-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000
|
||||
; asm: movzbl (%eax,%ebx,1),%ecx
|
||||
[-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18
|
||||
; asm: movsbl (%eax,%ebx,1),%ecx
|
||||
[-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18
|
||||
; asm: movzwl (%eax,%ebx,1),%ecx
|
||||
[-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18
|
||||
; asm: movswl (%eax,%ebx,1),%ecx
|
||||
[-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18
|
||||
|
||||
; Store Complex
|
||||
[-,%rcx] v601 = iconst.i32 1
|
||||
; asm: mov %ecx,(%eax,%ebx,1)
|
||||
store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18
|
||||
; asm: mov %ecx,0x1(%eax,%ebx,1)
|
||||
store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01
|
||||
; asm: mov %ecx,0x100000(%eax,%ebx,1)
|
||||
store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000
|
||||
; asm: mov %cx,(%eax,%ebx,1)
|
||||
istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18
|
||||
; asm: mov %cl,(%eax,%ebx,1)
|
||||
istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18
|
||||
|
||||
; asm: testl %ecx, %ecx
|
||||
; asm: je ebb1
|
||||
brz v1, ebb1 ; bin: 85 c9 74 0e
|
||||
|
||||
@@ -241,6 +241,34 @@ ebb0:
|
||||
; asm: ucomiss %xmm5, %xmm5
|
||||
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
|
||||
|
||||
|
||||
; Load/Store Complex
|
||||
|
||||
[-,%rax] v350 = iconst.i64 1
|
||||
[-,%rbx] v351 = iconst.i64 2
|
||||
|
||||
; asm: movss (%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18
|
||||
; asm: movss 0x32(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32
|
||||
; asm: movss -0x32(%rax,%rbx,1),%xmm10
|
||||
[-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce
|
||||
; asm: 0x2710(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710
|
||||
; asm: -0x2710(%rax,%rbx,1),%xmm10
|
||||
[-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0
|
||||
|
||||
; asm: movsd %xmm5, (%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18
|
||||
; asm: movsd %xmm5, 50(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32
|
||||
; asm: movsd %xmm10, -50(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce
|
||||
; asm: movsd %xmm5, 10000(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710
|
||||
; asm: movsd %xmm10, -10000(%rax,%rbx,1)
|
||||
[-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -476,6 +504,32 @@ ebb0:
|
||||
; asm: ucomisd %xmm5, %xmm5
|
||||
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed
|
||||
|
||||
; Load/Store Complex
|
||||
|
||||
[-,%rax] v350 = iconst.i64 1
|
||||
[-,%rbx] v351 = iconst.i64 2
|
||||
; asm: movsd (%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18
|
||||
; asm: movsd 0x32(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32
|
||||
; asm: movsd -0x32(%rax,%rbx,1),%xmm10
|
||||
[-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce
|
||||
; asm: movsd 0x2710(%rax,%rbx,1),%xmm5
|
||||
[-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710
|
||||
; asm: movsd -0x2710(%rax,%rbx,1),%xmm10
|
||||
[-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0
|
||||
|
||||
; asm: movsd %xmm5, (%rax,%rbx,1)
|
||||
[-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18
|
||||
; asm: movsd %xmm5, 50(%rax,%rbx,1)
|
||||
[-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32
|
||||
; asm: movsd %xmm10, -50(%rax,%rbx,1)
|
||||
[-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce
|
||||
; asm: movsd %xmm5, 10000(%rax,%rbx,1)
|
||||
[-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710
|
||||
; asm: movsd %xmm10, -10000(%rax,%rbx,1)
|
||||
[-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -594,6 +594,80 @@ ebb0:
|
||||
[-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f
|
||||
|
||||
|
||||
; Load Complex
|
||||
[-,%rax] v521 = iconst.i64 1
|
||||
[-,%rbx] v522 = iconst.i64 1
|
||||
[-,%rdi] v523 = iconst.i32 1
|
||||
[-,%rsi] v524 = iconst.i32 1
|
||||
; asm: movq (%rax,%rbx,1), %rcx
|
||||
[-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18
|
||||
; asm: movl (%rax,%rbx,1), %ecx
|
||||
[-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18
|
||||
; asm: movq 1(%rax,%rbx,1), %rcx
|
||||
[-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01
|
||||
; asm: movl 1(%rax,%rbx,1), %ecx
|
||||
[-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01
|
||||
; asm: mov 0x100000(%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000
|
||||
; asm: mov 0x100000(%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000
|
||||
; asm: movzbq (%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18
|
||||
; asm: movzbl (%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18
|
||||
; asm: movsbq (%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18
|
||||
; asm: movsbl (%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18
|
||||
; asm: movzwq (%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18
|
||||
; asm: movzwl (%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18
|
||||
; asm: movswq (%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18
|
||||
; asm: movswl (%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18
|
||||
; asm: mov (%rax,%rbx,1),%ecx
|
||||
[-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18
|
||||
; asm: movslq (%rax,%rbx,1),%rcx
|
||||
[-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18
|
||||
[-,%r13] v550 = iconst.i64 1
|
||||
[-,%r14] v551 = iconst.i64 1
|
||||
; asm: mov 0x0(%r13,%r14,1),%r12d
|
||||
[-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00
|
||||
|
||||
; Store Complex
|
||||
[-,%rcx] v600 = iconst.i64 1
|
||||
[-,%rcx] v601 = iconst.i32 1
|
||||
[-,%r10] v602 = iconst.i64 1
|
||||
[-,%r11] v603 = iconst.i32 1
|
||||
; asm: mov %rcx,(%rax,%rbx,1)
|
||||
store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18
|
||||
; asm: mov %rcx,0x1(%rax,%rbx,1)
|
||||
store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01
|
||||
; asm: mov %rcx,0x100000(%rax,%rbx,1)
|
||||
store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000
|
||||
; asm: mov %ecx,(%rax,%rbx,1)
|
||||
store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18
|
||||
; asm: mov %ecx,0x1(%rax,%rbx,1)
|
||||
store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01
|
||||
; asm: mov %ecx,0x100000(%rax,%rbx,1)
|
||||
store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000
|
||||
; asm: mov %ecx,(%rax,%rbx,1)
|
||||
istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18
|
||||
; asm: mov %cx,(%rax,%rbx,1)
|
||||
istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18
|
||||
; asm: mov %cx,(%rax,%rbx,1)
|
||||
istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18
|
||||
; asm: mov %r10w,(%rax,%rbx,1)
|
||||
istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18
|
||||
; asm: mov %r11w,(%rax,%rbx,1)
|
||||
istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18
|
||||
; asm: mov %cl,(%rax,%rbx,1)
|
||||
istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18
|
||||
; asm: mov %cl,(%rax,%rbx,1)
|
||||
istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18
|
||||
|
||||
; asm: testq %rcx, %rcx
|
||||
; asm: je ebb1
|
||||
brz v1, ebb1 ; bin: 48 85 c9 74 1b
|
||||
|
||||
@@ -158,9 +158,13 @@ ebb0(v1: i32):
|
||||
v6 = load.i64 aligned notrap v1
|
||||
v7 = load.i64 v1-12
|
||||
v8 = load.i64 notrap v1+0x1_0000
|
||||
v9 = load_complex.i64 v1+v2
|
||||
v10 = load_complex.i64 v1+v2+0x1
|
||||
store v2, v1
|
||||
store aligned v3, v1+12
|
||||
store notrap aligned v3, v1-12
|
||||
store_complex v3, v1+v2
|
||||
store_complex v3, v1+v2+0x1
|
||||
}
|
||||
; sameln: function %memory(i32) fast {
|
||||
; nextln: ebb0(v1: i32):
|
||||
@@ -171,9 +175,13 @@ ebb0(v1: i32):
|
||||
; nextln: v6 = load.i64 notrap aligned v1
|
||||
; nextln: v7 = load.i64 v1-12
|
||||
; nextln: v8 = load.i64 notrap v1+0x0001_0000
|
||||
; nextln: v9 = load_complex.i64 v1+v2
|
||||
; nextln: v10 = load_complex.i64 v1+v2+1
|
||||
; nextln: store v2, v1
|
||||
; nextln: store aligned v3, v1+12
|
||||
; nextln: store notrap aligned v3, v1-12
|
||||
; nextln: store_complex v3, v1+v2
|
||||
; nextln: store_complex v3, v1+v2+1
|
||||
|
||||
; Register diversions.
|
||||
; This test file has no ISA, so we can unly use register unit numbers.
|
||||
|
||||
95
cranelift/filetests/postopt/complex_memory_ops.cton
Normal file
95
cranelift/filetests/postopt/complex_memory_ops.cton
Normal file
@@ -0,0 +1,95 @@
|
||||
test postopt
|
||||
set is_64bit
|
||||
isa x86
|
||||
|
||||
function %dual_loads(i64, i64) -> i64 {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
[RexOp1rr#8001] v3 = iadd v0, v1
|
||||
v4 = load.i64 v3
|
||||
v5 = uload8.i64 v3
|
||||
v6 = sload8.i64 v3
|
||||
v7 = uload16.i64 v3
|
||||
v8 = sload16.i64 v3
|
||||
v9 = uload32.i64 v3
|
||||
v10 = sload32.i64 v3
|
||||
[Op1ret#c3] return v10
|
||||
}
|
||||
|
||||
; sameln: function %dual_loads
|
||||
; nextln: ebb0(v0: i64, v1: i64):
|
||||
; nextln: v3 = iadd v0, v1
|
||||
; nextln: v4 = load_complex.i64 v0+v1
|
||||
; nextln: v5 = uload8_complex.i64 v0+v1
|
||||
; nextln: v6 = sload8_complex.i64 v0+v1
|
||||
; nextln: v7 = uload16_complex.i64 v0+v1
|
||||
; nextln: v8 = sload16_complex.i64 v0+v1
|
||||
; nextln: v9 = uload32_complex v0+v1
|
||||
; nextln: v10 = sload32_complex v0+v1
|
||||
; nextln: return v10
|
||||
; nextln: }
|
||||
|
||||
function %dual_loads2(i64, i64) -> i64 {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
[RexOp1rr#8001] v3 = iadd v0, v1
|
||||
v4 = load.i64 v3+1
|
||||
v5 = uload8.i64 v3+1
|
||||
v6 = sload8.i64 v3+1
|
||||
v7 = uload16.i64 v3+1
|
||||
v8 = sload16.i64 v3+1
|
||||
v9 = uload32.i64 v3+1
|
||||
v10 = sload32.i64 v3+1
|
||||
[Op1ret#c3] return v10
|
||||
}
|
||||
|
||||
; sameln: function %dual_loads2
|
||||
; nextln: ebb0(v0: i64, v1: i64):
|
||||
; nextln: v3 = iadd v0, v1
|
||||
; nextln: v4 = load_complex.i64 v0+v1+1
|
||||
; nextln: v5 = uload8_complex.i64 v0+v1+1
|
||||
; nextln: v6 = sload8_complex.i64 v0+v1+1
|
||||
; nextln: v7 = uload16_complex.i64 v0+v1+1
|
||||
; nextln: v8 = sload16_complex.i64 v0+v1+1
|
||||
; nextln: v9 = uload32_complex v0+v1+1
|
||||
; nextln: v10 = sload32_complex v0+v1+1
|
||||
; nextln: return v10
|
||||
; nextln: }
|
||||
|
||||
function %dual_stores(i64, i64, i64) {
|
||||
ebb0(v0: i64, v1: i64, v2: i64):
|
||||
[RexOp1rr#8001] v3 = iadd v0, v1
|
||||
[RexOp1st#8089] store.i64 v2, v3
|
||||
[RexOp1st#88] istore8.i64 v2, v3
|
||||
[RexMp1st#189] istore16.i64 v2, v3
|
||||
[RexOp1st#89] istore32.i64 v2, v3
|
||||
[Op1ret#c3] return
|
||||
}
|
||||
|
||||
; sameln: function %dual_stores
|
||||
; nextln: ebb0(v0: i64, v1: i64, v2: i64):
|
||||
; nextln: v3 = iadd v0, v1
|
||||
; nextln: store_complex v2, v0+v1
|
||||
; nextln: istore8_complex v2, v0+v1
|
||||
; nextln: istore16_complex v2, v0+v1
|
||||
; nextln: istore32_complex v2, v0+v1
|
||||
; nextln: return
|
||||
; nextln: }
|
||||
|
||||
function %dual_stores2(i64, i64, i64) {
|
||||
ebb0(v0: i64, v1: i64, v2: i64):
|
||||
[RexOp1rr#8001] v3 = iadd v0, v1
|
||||
[RexOp1stDisp8#8089] store.i64 v2, v3+1
|
||||
[RexOp1stDisp8#88] istore8.i64 v2, v3+1
|
||||
[RexMp1stDisp8#189] istore16.i64 v2, v3+1
|
||||
[RexOp1stDisp8#89] istore32.i64 v2, v3+1
|
||||
[Op1ret#c3] return
|
||||
}
|
||||
|
||||
; sameln: function %dual_stores2
|
||||
; nextln: ebb0(v0: i64, v1: i64, v2: i64):
|
||||
; nextln: v3 = iadd v0, v1
|
||||
; nextln: store_complex v2, v0+v1+1
|
||||
; nextln: istore8_complex v2, v0+v1+1
|
||||
; nextln: istore16_complex v2, v0+v1+1
|
||||
; nextln: istore32_complex v2, v0+v1+1
|
||||
; nextln: return
|
||||
; nextln: }
|
||||
@@ -57,7 +57,9 @@ CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
|
||||
FuncAddr = InstructionFormat(func_ref)
|
||||
|
||||
Load = InstructionFormat(memflags, VALUE, offset32)
|
||||
LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32)
|
||||
Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
|
||||
StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32)
|
||||
|
||||
StackLoad = InstructionFormat(stack_slot, offset32)
|
||||
StackStore = InstructionFormat(VALUE, stack_slot, offset32)
|
||||
|
||||
@@ -246,6 +246,7 @@ x = Operand('x', Mem, doc='Value to be stored')
|
||||
a = Operand('a', Mem, doc='Value loaded')
|
||||
p = Operand('p', iAddr)
|
||||
Flags = Operand('Flags', memflags)
|
||||
args = Operand('args', VARIABLE_ARGS, doc='Address arguments')
|
||||
|
||||
load = Instruction(
|
||||
'load', r"""
|
||||
@@ -256,6 +257,15 @@ load = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
load_complex = Instruction(
|
||||
'load_complex', r"""
|
||||
Load from memory at ``sum(args) + Offset``.
|
||||
|
||||
This is a polymorphic instruction that can load any value type which
|
||||
has a memory representation.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
store = Instruction(
|
||||
'store', r"""
|
||||
Store ``x`` to memory at ``p + Offset``.
|
||||
@@ -265,6 +275,16 @@ store = Instruction(
|
||||
""",
|
||||
ins=(Flags, x, p, Offset), can_store=True)
|
||||
|
||||
store_complex = Instruction(
|
||||
'store_complex', r"""
|
||||
Store ``x`` to memory at ``sum(args) + Offset``.
|
||||
|
||||
This is a polymorphic instruction that can store any value type with a
|
||||
memory representation.
|
||||
""",
|
||||
ins=(Flags, x, args, Offset), can_store=True)
|
||||
|
||||
|
||||
iExt8 = TypeVar(
|
||||
'iExt8', 'An integer type with more than 8 bits',
|
||||
ints=(16, 64))
|
||||
@@ -279,6 +299,14 @@ uload8 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
uload8_complex = Instruction(
|
||||
'uload8_complex', r"""
|
||||
Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
|
||||
|
||||
This is equivalent to ``load.i8`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
sload8 = Instruction(
|
||||
'sload8', r"""
|
||||
Load 8 bits from memory at ``p + Offset`` and sign-extend.
|
||||
@@ -287,6 +315,14 @@ sload8 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
sload8_complex = Instruction(
|
||||
'sload8_complex', r"""
|
||||
Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
|
||||
|
||||
This is equivalent to ``load.i8`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
istore8 = Instruction(
|
||||
'istore8', r"""
|
||||
Store the low 8 bits of ``x`` to memory at ``p + Offset``.
|
||||
@@ -295,6 +331,14 @@ istore8 = Instruction(
|
||||
""",
|
||||
ins=(Flags, x, p, Offset), can_store=True)
|
||||
|
||||
istore8_complex = Instruction(
|
||||
'istore8_complex', r"""
|
||||
Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
|
||||
|
||||
This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
|
||||
""",
|
||||
ins=(Flags, x, args, Offset), can_store=True)
|
||||
|
||||
iExt16 = TypeVar(
|
||||
'iExt16', 'An integer type with more than 16 bits',
|
||||
ints=(32, 64))
|
||||
@@ -309,6 +353,14 @@ uload16 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
uload16_complex = Instruction(
|
||||
'uload16_complex', r"""
|
||||
Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
|
||||
|
||||
This is equivalent to ``load.i16`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
sload16 = Instruction(
|
||||
'sload16', r"""
|
||||
Load 16 bits from memory at ``p + Offset`` and sign-extend.
|
||||
@@ -317,6 +369,14 @@ sload16 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
sload16_complex = Instruction(
|
||||
'sload16_complex', r"""
|
||||
Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
|
||||
|
||||
This is equivalent to ``load.i16`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
istore16 = Instruction(
|
||||
'istore16', r"""
|
||||
Store the low 16 bits of ``x`` to memory at ``p + Offset``.
|
||||
@@ -325,6 +385,14 @@ istore16 = Instruction(
|
||||
""",
|
||||
ins=(Flags, x, p, Offset), can_store=True)
|
||||
|
||||
istore16_complex = Instruction(
|
||||
'istore16_complex', r"""
|
||||
Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
|
||||
|
||||
This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
|
||||
""",
|
||||
ins=(Flags, x, args, Offset), can_store=True)
|
||||
|
||||
iExt32 = TypeVar(
|
||||
'iExt32', 'An integer type with more than 32 bits',
|
||||
ints=(64, 64))
|
||||
@@ -339,6 +407,14 @@ uload32 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
uload32_complex = Instruction(
|
||||
'uload32_complex', r"""
|
||||
Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
|
||||
|
||||
This is equivalent to ``load.i32`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
sload32 = Instruction(
|
||||
'sload32', r"""
|
||||
Load 32 bits from memory at ``p + Offset`` and sign-extend.
|
||||
@@ -347,6 +423,14 @@ sload32 = Instruction(
|
||||
""",
|
||||
ins=(Flags, p, Offset), outs=a, can_load=True)
|
||||
|
||||
sload32_complex = Instruction(
|
||||
'sload32_complex', r"""
|
||||
Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
|
||||
|
||||
This is equivalent to ``load.i32`` followed by ``uextend``.
|
||||
""",
|
||||
ins=(Flags, args, Offset), outs=a, can_load=True)
|
||||
|
||||
istore32 = Instruction(
|
||||
'istore32', r"""
|
||||
Store the low 32 bits of ``x`` to memory at ``p + Offset``.
|
||||
@@ -355,6 +439,14 @@ istore32 = Instruction(
|
||||
""",
|
||||
ins=(Flags, x, p, Offset), can_store=True)
|
||||
|
||||
istore32_complex = Instruction(
|
||||
'istore32_complex', r"""
|
||||
Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
|
||||
|
||||
This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
|
||||
""",
|
||||
ins=(Flags, x, args, Offset), can_store=True)
|
||||
|
||||
x = Operand('x', Mem, doc='Value to be stored')
|
||||
a = Operand('a', Mem, doc='Value loaded')
|
||||
Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot')
|
||||
|
||||
@@ -2,12 +2,12 @@
|
||||
Cretonne predicates that consider `Function` fields.
|
||||
"""
|
||||
from cdsl.predicates import FieldPredicate
|
||||
from .formats import UnaryGlobalVar
|
||||
from .formats import UnaryGlobalVar, InstructionFormat
|
||||
|
||||
try:
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from cdsl.formats import FormatField # noqa
|
||||
from cdsl.formats import InstructionFormat, FormatField # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@@ -33,3 +33,10 @@ class IsColocatedData(FieldPredicate):
|
||||
# type: () -> None
|
||||
super(IsColocatedData, self).__init__(
|
||||
UnaryGlobalVar.global_var, 'is_colocated_data', ('func',))
|
||||
|
||||
|
||||
class LengthEquals(FieldPredicate):
|
||||
def __init__(self, iform, num):
|
||||
# type: (InstructionFormat, int) -> None
|
||||
super(LengthEquals, self).__init__(
|
||||
iform.args(), 'has_length_of', (num, 'func'))
|
||||
|
||||
@@ -103,6 +103,19 @@ class InstructionFormat(object):
|
||||
InstructionFormat._registry[sig] = self
|
||||
InstructionFormat.all_formats.append(self)
|
||||
|
||||
def args(self):
|
||||
# type: () -> FormatField
|
||||
"""
|
||||
Provides a ValueListField, which is derived from FormatField,
|
||||
corresponding to the full ValueList of the instruction format. This
|
||||
is useful for creating predicates for instructions which use variadic
|
||||
arguments.
|
||||
"""
|
||||
|
||||
if self.has_value_list:
|
||||
return ValueListField(self)
|
||||
return None
|
||||
|
||||
def _process_member_names(self, kinds):
|
||||
# type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField] # noqa
|
||||
"""
|
||||
@@ -210,7 +223,7 @@ class FormatField(object):
|
||||
This corresponds to a single member of a variant of the `InstructionData`
|
||||
data type.
|
||||
|
||||
:param iformat: Parent `InstructionFormat`.
|
||||
:param iform: Parent `InstructionFormat`.
|
||||
:param immnum: Immediate operand number in parent.
|
||||
:param kind: Immediate Operand kind.
|
||||
:param member: Member name in `InstructionData` variant.
|
||||
@@ -227,6 +240,29 @@ class FormatField(object):
|
||||
# type: () -> str
|
||||
return '{}.{}'.format(self.format.name, self.member)
|
||||
|
||||
def rust_destructuring_name(self):
|
||||
# type: () -> str
|
||||
return self.member
|
||||
|
||||
def rust_name(self):
|
||||
# type: () -> str
|
||||
return self.member
|
||||
|
||||
|
||||
class ValueListField(FormatField):
|
||||
"""
|
||||
The full value list field of an instruction format.
|
||||
|
||||
This corresponds to all Value-type members of a variant of the
|
||||
`InstructionData` format, which contains a ValueList.
|
||||
|
||||
:param iform: Parent `InstructionFormat`.
|
||||
"""
|
||||
def __init__(self, iform):
|
||||
# type: (InstructionFormat) -> None
|
||||
self.format = iform
|
||||
self.member = "args"
|
||||
|
||||
def rust_destructuring_name(self):
|
||||
# type: () -> str
|
||||
return 'ref {}'.format(self.member)
|
||||
|
||||
@@ -201,9 +201,10 @@ class Instruction(object):
|
||||
# Prefer to use the typevar_operand to infer the controlling typevar.
|
||||
self.use_typevar_operand = False
|
||||
typevar_error = None
|
||||
if self.format.typevar_operand is not None:
|
||||
tv_op = self.format.typevar_operand
|
||||
if tv_op is not None and tv_op < len(self.value_opnums):
|
||||
try:
|
||||
opnum = self.value_opnums[self.format.typevar_operand]
|
||||
opnum = self.value_opnums[tv_op]
|
||||
tv = self.ins[opnum].typevar
|
||||
if tv is tv.free_typevar() or tv.singleton_type() is not None:
|
||||
self.other_typevars = self._verify_ctrl_typevar(tv)
|
||||
|
||||
@@ -27,7 +27,7 @@ def gen_recipe(recipe, fmt):
|
||||
nvops = iform.num_value_operands
|
||||
want_args = any(isinstance(i, RegClass) or isinstance(i, Stack)
|
||||
for i in recipe.ins)
|
||||
assert not want_args or nvops > 0
|
||||
assert not want_args or nvops > 0 or iform.has_value_list
|
||||
want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack)
|
||||
for o in recipe.outs)
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ def emit_instp(instp, fmt, has_func=False):
|
||||
fnames = set() # type: Set[str]
|
||||
for p in leafs:
|
||||
if isinstance(p, FieldPredicate):
|
||||
fnames.add(p.field.rust_name())
|
||||
fnames.add(p.field.rust_destructuring_name())
|
||||
else:
|
||||
assert isinstance(p, TypePredicate)
|
||||
has_type_check = True
|
||||
|
||||
@@ -3,9 +3,9 @@ x86 Encodings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.predicates import IsUnsignedInt, Not, And
|
||||
from base.predicates import IsColocatedFunc, IsColocatedData
|
||||
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
|
||||
from base import instructions as base
|
||||
from base.formats import UnaryImm, FuncAddr, Call
|
||||
from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
|
||||
from .defs import X86_64, X86_32
|
||||
from . import recipes as r
|
||||
from . import settings as cfg
|
||||
@@ -19,6 +19,7 @@ try:
|
||||
from typing import TYPE_CHECKING, Any # noqa
|
||||
if TYPE_CHECKING:
|
||||
from cdsl.instructions import MaybeBoundInst # noqa
|
||||
from cdsl.predicates import FieldPredicate # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@@ -54,6 +55,15 @@ def enc_x86_64(inst, recipe, *args, **kwargs):
|
||||
X86_64.enc(inst, *recipe(*args, **kwargs))
|
||||
|
||||
|
||||
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst` to X86_64 with and without a REX prefix.
|
||||
"""
|
||||
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
|
||||
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
|
||||
def enc_both(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
|
||||
"""
|
||||
@@ -63,6 +73,15 @@ def enc_both(inst, recipe, *args, **kwargs):
|
||||
enc_x86_64(inst, recipe, *args, **kwargs)
|
||||
|
||||
|
||||
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
|
||||
"""
|
||||
Add encodings for `inst` to both X86_32 and X86_64.
|
||||
"""
|
||||
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
|
||||
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
|
||||
|
||||
|
||||
def enc_i32_i64(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
|
||||
"""
|
||||
@@ -80,6 +99,25 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
|
||||
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
|
||||
|
||||
|
||||
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst.i32` to X86_32.
|
||||
Add encodings for `inst.i32` to X86_64 with and without REX.
|
||||
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
|
||||
|
||||
Similar to `enc_i32_i64` but applies `instp` to each encoding.
|
||||
"""
|
||||
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
# REX-less encoding must come after REX encoding so we don't use it by
|
||||
# default. Otherwise reg-alloc would never use r8 and up.
|
||||
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
|
||||
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
|
||||
|
||||
|
||||
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
|
||||
"""
|
||||
@@ -212,6 +250,31 @@ X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
||||
#
|
||||
# Loads and stores.
|
||||
#
|
||||
|
||||
ldcomplexp = LengthEquals(LoadComplex, 2)
|
||||
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
|
||||
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
|
||||
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
|
||||
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
|
||||
instp=ldcomplexp)
|
||||
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
|
||||
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
|
||||
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
|
||||
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
|
||||
|
||||
stcomplexp = LengthEquals(StoreComplex, 3)
|
||||
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
|
||||
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
|
||||
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
|
||||
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
|
||||
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
|
||||
|
||||
for recipe in [r.stWithIndex_abcd,
|
||||
r.stWithIndexDisp8_abcd,
|
||||
r.stWithIndexDisp32_abcd]:
|
||||
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
|
||||
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
|
||||
|
||||
for recipe in [r.st, r.stDisp8, r.stDisp32]:
|
||||
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
|
||||
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
|
||||
@@ -286,18 +349,34 @@ enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
@@ -14,6 +14,7 @@ from base.formats import IntSelect, IntCondTrap, FloatCondTrap
|
||||
from base.formats import Jump, Branch, BranchInt, BranchFloat
|
||||
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
|
||||
from base.formats import RegMove, RegSpill, RegFill, CopySpecial
|
||||
from base.formats import LoadComplex, StoreComplex
|
||||
from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE
|
||||
from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG
|
||||
from .registers import StackGPR32, StackFPR32
|
||||
@@ -739,6 +740,22 @@ st = TailRecipe(
|
||||
modrm_rm(in_reg1, in_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and no offset.
|
||||
stWithIndex = TailRecipe(
|
||||
'stWithIndex', StoreComplex, size=2,
|
||||
ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsEqual(StoreComplex.offset, 0),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with no offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
st_abcd = TailRecipe(
|
||||
@@ -754,6 +771,23 @@ st_abcd = TailRecipe(
|
||||
modrm_rm(in_reg1, in_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and no offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
stWithIndex_abcd = TailRecipe(
|
||||
'stWithIndex_abcd', StoreComplex, size=2,
|
||||
ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsEqual(StoreComplex.offset, 0),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store of FPR with no offset.
|
||||
fst = TailRecipe(
|
||||
'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
|
||||
@@ -766,6 +800,20 @@ fst = TailRecipe(
|
||||
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
|
||||
modrm_rm(in_reg1, in_reg0, sink);
|
||||
''')
|
||||
# XX /r register-indirect store with index and no offset of FPR.
|
||||
fstWithIndex = TailRecipe(
|
||||
'fstWithIndex', StoreComplex, size=2,
|
||||
ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(),
|
||||
instp=IsEqual(StoreComplex.offset, 0),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 8-bit offset.
|
||||
stDisp8 = TailRecipe(
|
||||
@@ -781,6 +829,27 @@ stDisp8 = TailRecipe(
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 8-bit offset.
|
||||
stWithIndexDisp8 = TailRecipe(
|
||||
'stWithIndexDisp8', StoreComplex, size=3,
|
||||
ins=(GPR, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 8),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp8(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 8-bit offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
stDisp8_abcd = TailRecipe(
|
||||
'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(),
|
||||
instp=IsSignedInt(Store.offset, 8),
|
||||
@@ -795,6 +864,27 @@ stDisp8_abcd = TailRecipe(
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 8-bit offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
stWithIndexDisp8_abcd = TailRecipe(
|
||||
'stWithIndexDisp8_abcd', StoreComplex, size=3,
|
||||
ins=(ABCD, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 8),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp8(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 8-bit offset of FPR.
|
||||
fstDisp8 = TailRecipe(
|
||||
'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(),
|
||||
instp=IsSignedInt(Store.offset, 8),
|
||||
@@ -809,6 +899,24 @@ fstDisp8 = TailRecipe(
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 8-bit offset of FPR.
|
||||
fstWithIndexDisp8 = TailRecipe(
|
||||
'fstWithIndexDisp8', StoreComplex, size=3,
|
||||
ins=(FPR, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 8),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp8(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 32-bit offset.
|
||||
stDisp32 = TailRecipe(
|
||||
'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
|
||||
@@ -822,6 +930,27 @@ stDisp32 = TailRecipe(
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 32-bit offset.
|
||||
stWithIndexDisp32 = TailRecipe(
|
||||
'stWithIndexDisp32', StoreComplex, size=6,
|
||||
ins=(GPR, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 32),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp32(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 32-bit offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
stDisp32_abcd = TailRecipe(
|
||||
'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(),
|
||||
when_prefixed=stDisp32,
|
||||
@@ -835,6 +964,27 @@ stDisp32_abcd = TailRecipe(
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 32-bit offset.
|
||||
# Only ABCD allowed for stored value. This is for byte stores with no REX.
|
||||
stWithIndexDisp32_abcd = TailRecipe(
|
||||
'stWithIndexDisp32_abcd', StoreComplex, size=6,
|
||||
ins=(ABCD, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 32),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp32(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with 32-bit offset of FPR.
|
||||
fstDisp32 = TailRecipe(
|
||||
'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
|
||||
clobbers_flags=False,
|
||||
@@ -848,6 +998,24 @@ fstDisp32 = TailRecipe(
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r register-indirect store with index and 32-bit offset of FPR.
|
||||
fstWithIndexDisp32 = TailRecipe(
|
||||
'fstWithIndexDisp32', StoreComplex, size=6,
|
||||
ins=(FPR, GPR, GPR_DEREF_SAFE),
|
||||
outs=(),
|
||||
instp=IsSignedInt(StoreComplex.offset, 32),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
|
||||
modrm_sib_disp32(in_reg0, sink);
|
||||
sib(0, in_reg2, in_reg1, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# Unary spill with SIB and 32-bit displacement.
|
||||
spillSib32 = TailRecipe(
|
||||
'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
|
||||
@@ -919,6 +1087,22 @@ ld = TailRecipe(
|
||||
modrm_rm(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r load with index and no offset.
|
||||
ldWithIndex = TailRecipe(
|
||||
'ldWithIndex', LoadComplex, size=2,
|
||||
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
|
||||
outs=(GPR),
|
||||
instp=IsEqual(LoadComplex.offset, 0),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r float load with no offset.
|
||||
fld = TailRecipe(
|
||||
'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR),
|
||||
@@ -932,6 +1116,22 @@ fld = TailRecipe(
|
||||
modrm_rm(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r float load with index and no offset.
|
||||
fldWithIndex = TailRecipe(
|
||||
'fldWithIndex', LoadComplex, size=2,
|
||||
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
|
||||
outs=(FPR),
|
||||
instp=IsEqual(LoadComplex.offset, 0),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r load with 8-bit offset.
|
||||
ldDisp8 = TailRecipe(
|
||||
'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR),
|
||||
@@ -947,6 +1147,24 @@ ldDisp8 = TailRecipe(
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r load with index and 8-bit offset.
|
||||
ldWithIndexDisp8 = TailRecipe(
|
||||
'ldWithIndexDisp8', LoadComplex, size=3,
|
||||
ins=(GPR, GPR_DEREF_SAFE),
|
||||
outs=(GPR),
|
||||
instp=IsSignedInt(LoadComplex.offset, 8),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib_disp8(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r float load with 8-bit offset.
|
||||
fldDisp8 = TailRecipe(
|
||||
'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR),
|
||||
@@ -962,6 +1180,24 @@ fldDisp8 = TailRecipe(
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r float load with 8-bit offset.
|
||||
fldWithIndexDisp8 = TailRecipe(
|
||||
'fldWithIndexDisp8', LoadComplex, size=3,
|
||||
ins=(GPR, GPR_DEREF_SAFE),
|
||||
outs=(FPR),
|
||||
instp=IsSignedInt(LoadComplex.offset, 8),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib_disp8(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put1(offset as u8);
|
||||
''')
|
||||
|
||||
# XX /r load with 32-bit offset.
|
||||
ldDisp32 = TailRecipe(
|
||||
'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR),
|
||||
@@ -977,6 +1213,24 @@ ldDisp32 = TailRecipe(
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r load with index and 32-bit offset.
|
||||
ldWithIndexDisp32 = TailRecipe(
|
||||
'ldWithIndexDisp32', LoadComplex, size=6,
|
||||
ins=(GPR, GPR_DEREF_SAFE),
|
||||
outs=(GPR),
|
||||
instp=IsSignedInt(LoadComplex.offset, 32),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib_disp32(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r float load with 32-bit offset.
|
||||
fldDisp32 = TailRecipe(
|
||||
'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR),
|
||||
@@ -992,6 +1246,24 @@ fldDisp32 = TailRecipe(
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# XX /r float load with index and 32-bit offset.
|
||||
fldWithIndexDisp32 = TailRecipe(
|
||||
'fldWithIndexDisp32', LoadComplex, size=6,
|
||||
ins=(GPR, GPR_DEREF_SAFE),
|
||||
outs=(FPR),
|
||||
instp=IsSignedInt(LoadComplex.offset, 32),
|
||||
clobbers_flags=False,
|
||||
emit='''
|
||||
if !flags.notrap() {
|
||||
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
|
||||
}
|
||||
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
|
||||
modrm_sib_disp32(out_reg0, sink);
|
||||
sib(0, in_reg1, in_reg0, sink);
|
||||
let offset: i32 = offset.into();
|
||||
sink.put4(offset as u32);
|
||||
''')
|
||||
|
||||
# Unary fill with SIB and 32-bit displacement.
|
||||
fillSib32 = TailRecipe(
|
||||
'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
|
||||
|
||||
@@ -162,6 +162,11 @@ pub trait TargetIsa: fmt::Display {
|
||||
false
|
||||
}
|
||||
|
||||
/// Does the CPU implement multi-register addressing?
|
||||
fn uses_complex_addresses(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Get a data structure describing the registers in this ISA.
|
||||
fn register_info(&self) -> RegInfo;
|
||||
|
||||
|
||||
@@ -46,6 +46,18 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
|
||||
BASE_REX | b | (r << 2)
|
||||
}
|
||||
|
||||
// Create a three-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
// REX.X = bit 3 of SIB index register.
|
||||
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
let x = ((index >> 3) & 1) as u8;
|
||||
BASE_REX | b | (x << 1) | (r << 2)
|
||||
}
|
||||
|
||||
// Emit a REX prefix.
|
||||
//
|
||||
// The R, X, and B bits are computed from registers using the functions above. The W bit is
|
||||
@@ -211,7 +223,19 @@ fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS)
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
|
||||
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
|
||||
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp8(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp32(0b100, reg, sink);
|
||||
}
|
||||
@@ -225,6 +249,16 @@ fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
|
||||
// SIB SS_III_BBB.
|
||||
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
|
||||
let scale = scale & 3;
|
||||
let index = index as u8 & 7;
|
||||
let base = base as u8 & 7;
|
||||
let b: u8 = (scale << 6) | (index << 3) | base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for an integer condition code.
|
||||
///
|
||||
/// Add this offset to a base opcode for:
|
||||
|
||||
@@ -62,6 +62,10 @@ impl TargetIsa for Isa {
|
||||
true
|
||||
}
|
||||
|
||||
fn uses_complex_addresses(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
use cursor::{Cursor, EncCursor};
|
||||
use ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use ir::dfg::ValueDef;
|
||||
use ir::immediates::Imm64;
|
||||
use ir::immediates::{Imm64, Offset32};
|
||||
use ir::instructions::{Opcode, ValueList};
|
||||
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
|
||||
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value, Type, MemFlags};
|
||||
use isa::TargetIsa;
|
||||
use timing;
|
||||
|
||||
@@ -173,6 +173,158 @@ fn optimize_cpu_flags(
|
||||
pos.func.update_encoding(info.br_inst, isa).is_ok();
|
||||
}
|
||||
|
||||
|
||||
struct MemOpInfo {
|
||||
opcode: Opcode,
|
||||
inst: Inst,
|
||||
itype: Type,
|
||||
arg: Value,
|
||||
st_arg: Option<Value>,
|
||||
flags: MemFlags,
|
||||
offset: Offset32,
|
||||
add_args: Option<[Value; 2]>,
|
||||
}
|
||||
|
||||
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
|
||||
let mut info = match pos.func.dfg[inst] {
|
||||
InstructionData::Load {
|
||||
opcode,
|
||||
arg,
|
||||
flags,
|
||||
offset,
|
||||
} => MemOpInfo {
|
||||
opcode: opcode,
|
||||
inst: inst,
|
||||
itype: pos.func.dfg.ctrl_typevar(inst),
|
||||
arg: arg,
|
||||
st_arg: None,
|
||||
flags: flags,
|
||||
offset: offset,
|
||||
add_args: None,
|
||||
},
|
||||
InstructionData::Store {
|
||||
opcode,
|
||||
args,
|
||||
flags,
|
||||
offset,
|
||||
} => MemOpInfo {
|
||||
opcode: opcode,
|
||||
inst: inst,
|
||||
itype: pos.func.dfg.ctrl_typevar(inst),
|
||||
arg: args[1],
|
||||
st_arg: Some(args[0]),
|
||||
flags: flags,
|
||||
offset: offset,
|
||||
add_args: None,
|
||||
},
|
||||
_ => return,
|
||||
};
|
||||
|
||||
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
|
||||
match pos.func.dfg[result_inst] {
|
||||
InstructionData::Binary { opcode, args } if opcode == Opcode::Iadd => {
|
||||
info.add_args = Some(args.clone());
|
||||
}
|
||||
_ => return,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
match info.opcode {
|
||||
Opcode::Load => {
|
||||
pos.func.dfg.replace(info.inst).load_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload8 => {
|
||||
pos.func.dfg.replace(info.inst).uload8_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Sload8 => {
|
||||
pos.func.dfg.replace(info.inst).sload8_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload16 => {
|
||||
pos.func.dfg.replace(info.inst).uload16_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Sload16 => {
|
||||
pos.func.dfg.replace(info.inst).sload16_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload32 => {
|
||||
pos.func.dfg.replace(info.inst).uload32_complex(
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Sload32 => {
|
||||
pos.func.dfg.replace(info.inst).sload32_complex(
|
||||
info.flags,
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Store => {
|
||||
pos.func.dfg.replace(info.inst).store_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore8 => {
|
||||
pos.func.dfg.replace(info.inst).istore8_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore16 => {
|
||||
pos.func.dfg.replace(info.inst).istore16_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore32 => {
|
||||
pos.func.dfg.replace(info.inst).istore32_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&info.add_args.unwrap(),
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
_ => return,
|
||||
}
|
||||
pos.func.update_encoding(info.inst, isa).is_ok();
|
||||
}
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// The main post-opt pass.
|
||||
@@ -198,6 +350,10 @@ pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isa.uses_complex_addresses() {
|
||||
optimize_complex_addresses(&mut pos, inst, isa);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,6 +46,11 @@ pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
|
||||
value_list.len(&func.dfg.value_lists) == num
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -335,6 +335,12 @@ impl<'a> Verifier<'a> {
|
||||
RegFill { src, .. } => {
|
||||
self.verify_stack_slot(inst, src)?;
|
||||
}
|
||||
LoadComplex { ref args, .. } => {
|
||||
self.verify_value_list(inst, args)?;
|
||||
}
|
||||
StoreComplex { ref args, .. } => {
|
||||
self.verify_value_list(inst, args)?;
|
||||
}
|
||||
|
||||
// Exhaustive list so we can't forget to add new formats
|
||||
Unary { .. } |
|
||||
@@ -1149,8 +1155,8 @@ impl<'a> Verifier<'a> {
|
||||
mod tests {
|
||||
use super::{Error, Verifier};
|
||||
use entity::EntityList;
|
||||
use ir::Function;
|
||||
use ir::instructions::{InstructionData, Opcode};
|
||||
use ir::Function;
|
||||
use settings;
|
||||
|
||||
macro_rules! assert_err_with_msg {
|
||||
|
||||
@@ -369,12 +369,44 @@ pub fn write_operands(
|
||||
} => write!(w, " {}, {}{}", arg, stack_slot, offset),
|
||||
HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
|
||||
Load { flags, arg, offset, .. } => write!(w, "{} {}{}", flags, arg, offset),
|
||||
LoadComplex {
|
||||
flags,
|
||||
ref args,
|
||||
offset,
|
||||
..
|
||||
} => {
|
||||
let args = args.as_slice(pool);
|
||||
write!(
|
||||
w,
|
||||
"{} {}{}",
|
||||
flags,
|
||||
DisplayValuesWithDelimiter(&args, '+'),
|
||||
offset
|
||||
)
|
||||
|
||||
}
|
||||
Store {
|
||||
flags,
|
||||
args,
|
||||
offset,
|
||||
..
|
||||
} => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset),
|
||||
StoreComplex {
|
||||
flags,
|
||||
ref args,
|
||||
offset,
|
||||
..
|
||||
} => {
|
||||
let args = args.as_slice(pool);
|
||||
write!(
|
||||
w,
|
||||
"{} {}, {}{}",
|
||||
flags,
|
||||
args[0],
|
||||
DisplayValuesWithDelimiter(&args[1..], '+'),
|
||||
offset
|
||||
)
|
||||
}
|
||||
RegMove { arg, src, dst, .. } => {
|
||||
if let Some(isa) = isa {
|
||||
let regs = isa.register_info();
|
||||
@@ -450,6 +482,21 @@ impl<'a> fmt::Display for DisplayValues<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
struct DisplayValuesWithDelimiter<'a>(&'a [Value], char);
|
||||
|
||||
impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result {
|
||||
for (i, val) in self.0.iter().enumerate() {
|
||||
if i == 0 {
|
||||
write!(f, "{}", val)?;
|
||||
} else {
|
||||
write!(f, "{}{}", self.1, val)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ir::types;
|
||||
|
||||
@@ -22,6 +22,7 @@ pub enum Token<'a> {
|
||||
LBracket, // '['
|
||||
RBracket, // ']'
|
||||
Minus, // '-'
|
||||
Plus, // '+'
|
||||
Comma, // ','
|
||||
Dot, // '.'
|
||||
Colon, // ':'
|
||||
@@ -169,6 +170,25 @@ impl<'a> Lexer<'a> {
|
||||
self.source[self.pos..].starts_with(prefix)
|
||||
}
|
||||
|
||||
// Starting from `lookahead`, are we looking at a number?
|
||||
fn looking_at_numeric(&self) -> bool {
|
||||
if let Some(c) = self.lookahead {
|
||||
if c.is_digit(10) {
|
||||
return true;
|
||||
}
|
||||
match c {
|
||||
'-' => return true,
|
||||
'+' => return true,
|
||||
'.' => return true,
|
||||
_ => {}
|
||||
}
|
||||
if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
// Scan a single-char token.
|
||||
fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
|
||||
assert_ne!(self.lookahead, None);
|
||||
@@ -234,16 +254,17 @@ impl<'a> Lexer<'a> {
|
||||
match self.lookahead {
|
||||
Some('-') => {
|
||||
self.next_ch();
|
||||
|
||||
if let Some(c) = self.lookahead {
|
||||
// If the next character won't parse as a number, we return Token::Minus
|
||||
if !c.is_alphanumeric() && c != '.' {
|
||||
if !self.looking_at_numeric() {
|
||||
// If the next characters won't parse as a number, we return Token::Minus
|
||||
return token(Token::Minus, loc);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some('+') => {
|
||||
self.next_ch();
|
||||
if !self.looking_at_numeric() {
|
||||
// If the next characters won't parse as a number, we return Token::Minus
|
||||
return token(Token::Plus, loc);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ use cretonne_codegen::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFun
|
||||
Type, Value, ValueLoc};
|
||||
use cretonne_codegen::isa::{self, Encoding, RegUnit, TargetIsa};
|
||||
use cretonne_codegen::packed_option::ReservedValue;
|
||||
use cretonne_codegen::{settings, timing};
|
||||
use cretonne_codegen::settings::CallConv;
|
||||
use cretonne_codegen::{settings, timing};
|
||||
use error::{Error, Location, Result};
|
||||
use isaspec;
|
||||
use lexer::{self, Lexer, Token};
|
||||
@@ -1872,6 +1872,24 @@ impl<'a> Parser<'a> {
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
fn parse_value_sequence(&mut self) -> Result<VariableArgs> {
|
||||
let mut args = VariableArgs::new();
|
||||
|
||||
if let Some(Token::Value(v)) = self.token() {
|
||||
args.push(v);
|
||||
self.consume();
|
||||
} else {
|
||||
return Ok(args);
|
||||
}
|
||||
|
||||
while self.optional(Token::Plus) {
|
||||
args.push(self.match_value("expected value in argument list")?);
|
||||
}
|
||||
|
||||
Ok(args)
|
||||
|
||||
}
|
||||
|
||||
// Parse an optional value list enclosed in parantheses.
|
||||
fn parse_opt_value_list(&mut self) -> Result<VariableArgs> {
|
||||
if !self.optional(Token::LPar) {
|
||||
@@ -2267,6 +2285,17 @@ impl<'a> Parser<'a> {
|
||||
offset,
|
||||
}
|
||||
}
|
||||
InstructionFormat::LoadComplex => {
|
||||
let flags = self.optional_memflags();
|
||||
let args = self.parse_value_sequence()?;
|
||||
let offset = self.optional_offset32()?;
|
||||
InstructionData::LoadComplex {
|
||||
opcode,
|
||||
flags,
|
||||
args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists),
|
||||
offset,
|
||||
}
|
||||
}
|
||||
InstructionFormat::Store => {
|
||||
let flags = self.optional_memflags();
|
||||
let arg = self.match_value("expected SSA value operand")?;
|
||||
@@ -2283,6 +2312,23 @@ impl<'a> Parser<'a> {
|
||||
offset,
|
||||
}
|
||||
}
|
||||
|
||||
InstructionFormat::StoreComplex => {
|
||||
let flags = self.optional_memflags();
|
||||
let src = self.match_value("expected SSA value operand")?;
|
||||
self.match_token(
|
||||
Token::Comma,
|
||||
"expected ',' between operands",
|
||||
)?;
|
||||
let args = self.parse_value_sequence()?;
|
||||
let offset = self.optional_offset32()?;
|
||||
InstructionData::StoreComplex {
|
||||
opcode,
|
||||
flags,
|
||||
args: args.into_value_list(&[src], &mut ctx.function.dfg.value_lists),
|
||||
offset,
|
||||
}
|
||||
}
|
||||
InstructionFormat::RegMove => {
|
||||
let arg = self.match_value("expected SSA value operand")?;
|
||||
self.match_token(
|
||||
@@ -2402,9 +2448,9 @@ impl<'a> Parser<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use cretonne_codegen::ir::StackSlotKind;
|
||||
use cretonne_codegen::ir::entities::AnyEntity;
|
||||
use cretonne_codegen::ir::types;
|
||||
use cretonne_codegen::ir::StackSlotKind;
|
||||
use cretonne_codegen::ir::{ArgumentExtension, ArgumentPurpose};
|
||||
use cretonne_codegen::settings::CallConv;
|
||||
use error::Error;
|
||||
|
||||
Reference in New Issue
Block a user