load_complex and store_complex instructions (#309)

* Start adding the load_complex and store_complex instructions.

N.b.:
The text format is not correct yet. Requires changes to the lexer and parser.
I'm not sure why I needed to change the RuntimeError to Exception yet. Will fix.

* Get first few encodings of load_complex working. Still needs var args type checking.

* Clean up ModRM helper functions in binemit.

* Implement 32-bit displace for load_complex

* Use encoding helpers instead of doing them all by hand

* Initial implementation of store_complex

* Parse value list for load/store_complex with + as delimiter. Looks nice.

* Add sign/zero-extension and size variants for load_complex.

* Add size variants of store_complex.

* Add asm helper lines to load/store complex bin tests.

* Example of length-checking the instruction ValueList for an encoding. Extremely questionable implementation.

* Fix Python linting issues

* First draft of postopt pass to fold adds and loads into load_complex. Just simple loads for now.

* Optimization pass now works with all types of loads.

* Add store+add -> store_complex to postopt pass

* Put complex address optimization behind ISA flag.

* Add load/store complex for f32 and f64

* Fixes changes to lexer that broke NaN parsing.

Abstracts away the repeated checks for whether or not the characters
following a + or - are going to be parsed as a number or not.

* Fix formatting issues

* Fix register restrictions for complex addresses.

* Encoding tests for x86-32.

* Add documentation for newly added instructions, recipes, and cdsl changes.

* Fix python formatting again

* Apply value-list length predicates to all LoadComplex and StoreComplex instructions.

* Add predicate types to new encoding helpers for mypy.

* Import FieldPredicate to satisfy mypy.

* Add and fix some "asm" strings in the encoding tests.

* Line-up 'bin' comments in x86/binary64 test

* Test parsing of offset-less store_complex instruction.

* 'sNaN' not 'sNan'

* Bounds check the lookup for polymorphic typevar operand.

* Fix encodings for istore16_complex.
This commit is contained in:
Tyler McMullen
2018-05-09 12:07:00 -07:00
committed by Dan Gohman
parent 5aa84a744b
commit f636d795c5
25 changed files with 1127 additions and 21 deletions

View File

@@ -476,6 +476,11 @@ these instructions is undefined. If it is addressable but not
There are also more restricted operations for accessing specific types of memory
objects.
Additionally, instructions are provided for handling multi-register addressing.
.. autoinst:: load_complex
.. autoinst:: store_complex
Memory operation flags
----------------------

View File

@@ -227,6 +227,32 @@ ebb0:
; asm: ucomiss %xmm5, %xmm5
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
; Load/Store Complex
[-,%rax] v350 = iconst.i32 1
[-,%rbx] v351 = iconst.i32 2
; asm: movss (%rax,%rbx,1),%xmm5
[-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18
; asm: movss 0x32(%rax,%rbx,1),%xmm5
[-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32
; asm: movss -0x32(%rax,%rbx,1),%xmm5
[-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce
; asm: movss 0x2710(%rax,%rbx,1),%xmm5
[-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710
; asm: movss -0x2710(%rax,%rbx,1),%xmm5
[-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0
; asm: movss %xmm5,(%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18
; asm: movss %xmm5,0x32(%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32
; asm: movss %xmm2,-0x32(%rax,%rbx,1)
[-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce
; asm: movss %xmm5,0x2710(%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710
; asm: movss %xmm2,-0x2710(%rax,%rbx,1)
[-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0
return
}

View File

@@ -432,6 +432,37 @@ ebb0:
; asm: shrl $8, %esi
[-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08
; Load Complex
[-,%rax] v521 = iconst.i32 1
[-,%rbx] v522 = iconst.i32 1
; asm: movl (%eax,%ebx,1), %ecx
[-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18
; asm: movl 1(%eax,%ebx,1), %ecx
[-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01
; asm: mov 0x100000(%eax,%ebx,1),%ecx
[-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000
; asm: movzbl (%eax,%ebx,1),%ecx
[-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18
; asm: movsbl (%eax,%ebx,1),%ecx
[-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18
; asm: movzwl (%eax,%ebx,1),%ecx
[-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18
; asm: movswl (%eax,%ebx,1),%ecx
[-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18
; Store Complex
[-,%rcx] v601 = iconst.i32 1
; asm: mov %ecx,(%eax,%ebx,1)
store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18
; asm: mov %ecx,0x1(%eax,%ebx,1)
store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01
; asm: mov %ecx,0x100000(%eax,%ebx,1)
store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000
; asm: mov %cx,(%eax,%ebx,1)
istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18
; asm: mov %cl,(%eax,%ebx,1)
istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18
; asm: testl %ecx, %ecx
; asm: je ebb1
brz v1, ebb1 ; bin: 85 c9 74 0e

View File

@@ -241,6 +241,34 @@ ebb0:
; asm: ucomiss %xmm5, %xmm5
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed
; Load/Store Complex
[-,%rax] v350 = iconst.i64 1
[-,%rbx] v351 = iconst.i64 2
; asm: movss (%rax,%rbx,1),%xmm5
[-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18
; asm: movss 0x32(%rax,%rbx,1),%xmm5
[-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32
; asm: movss -0x32(%rax,%rbx,1),%xmm10
[-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce
; asm: 0x2710(%rax,%rbx,1),%xmm5
[-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710
; asm: -0x2710(%rax,%rbx,1),%xmm10
[-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0
; asm: movsd %xmm5, (%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18
; asm: movsd %xmm5, 50(%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32
; asm: movsd %xmm10, -50(%rax,%rbx,1)
[-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce
; asm: movsd %xmm5, 10000(%rax,%rbx,1)
[-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710
; asm: movsd %xmm10, -10000(%rax,%rbx,1)
[-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0
return
}
@@ -476,6 +504,32 @@ ebb0:
; asm: ucomisd %xmm5, %xmm5
[-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed
; Load/Store Complex
[-,%rax] v350 = iconst.i64 1
[-,%rbx] v351 = iconst.i64 2
; asm: movsd (%rax,%rbx,1),%xmm5
[-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18
; asm: movsd 0x32(%rax,%rbx,1),%xmm5
[-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32
; asm: movsd -0x32(%rax,%rbx,1),%xmm10
[-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce
; asm: movsd 0x2710(%rax,%rbx,1),%xmm5
[-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710
; asm: movsd -0x2710(%rax,%rbx,1),%xmm10
[-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0
; asm: movsd %xmm5, (%rax,%rbx,1)
[-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18
; asm: movsd %xmm5, 50(%rax,%rbx,1)
[-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32
; asm: movsd %xmm10, -50(%rax,%rbx,1)
[-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce
; asm: movsd %xmm5, 10000(%rax,%rbx,1)
[-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710
; asm: movsd %xmm10, -10000(%rax,%rbx,1)
[-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0
return
}

View File

@@ -594,6 +594,80 @@ ebb0:
[-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f
; Load Complex
[-,%rax] v521 = iconst.i64 1
[-,%rbx] v522 = iconst.i64 1
[-,%rdi] v523 = iconst.i32 1
[-,%rsi] v524 = iconst.i32 1
; asm: movq (%rax,%rbx,1), %rcx
[-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18
; asm: movl (%rax,%rbx,1), %ecx
[-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18
; asm: movq 1(%rax,%rbx,1), %rcx
[-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01
; asm: movl 1(%rax,%rbx,1), %ecx
[-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01
; asm: mov 0x100000(%rax,%rbx,1),%rcx
[-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000
; asm: mov 0x100000(%rax,%rbx,1),%ecx
[-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000
; asm: movzbq (%rax,%rbx,1),%rcx
[-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18
; asm: movzbl (%rax,%rbx,1),%ecx
[-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18
; asm: movsbq (%rax,%rbx,1),%rcx
[-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18
; asm: movsbl (%rax,%rbx,1),%ecx
[-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18
; asm: movzwq (%rax,%rbx,1),%rcx
[-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18
; asm: movzwl (%rax,%rbx,1),%ecx
[-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18
; asm: movswq (%rax,%rbx,1),%rcx
[-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18
; asm: movswl (%rax,%rbx,1),%ecx
[-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18
; asm: mov (%rax,%rbx,1),%ecx
[-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18
; asm: movslq (%rax,%rbx,1),%rcx
[-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18
[-,%r13] v550 = iconst.i64 1
[-,%r14] v551 = iconst.i64 1
; asm: mov 0x0(%r13,%r14,1),%r12d
[-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00
; Store Complex
[-,%rcx] v600 = iconst.i64 1
[-,%rcx] v601 = iconst.i32 1
[-,%r10] v602 = iconst.i64 1
[-,%r11] v603 = iconst.i32 1
; asm: mov %rcx,(%rax,%rbx,1)
store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18
; asm: mov %rcx,0x1(%rax,%rbx,1)
store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01
; asm: mov %rcx,0x100000(%rax,%rbx,1)
store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000
; asm: mov %ecx,(%rax,%rbx,1)
store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18
; asm: mov %ecx,0x1(%rax,%rbx,1)
store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01
; asm: mov %ecx,0x100000(%rax,%rbx,1)
store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000
; asm: mov %ecx,(%rax,%rbx,1)
istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18
; asm: mov %cx,(%rax,%rbx,1)
istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18
; asm: mov %cx,(%rax,%rbx,1)
istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18
; asm: mov %r10w,(%rax,%rbx,1)
istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18
; asm: mov %r11w,(%rax,%rbx,1)
istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18
; asm: mov %cl,(%rax,%rbx,1)
istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18
; asm: mov %cl,(%rax,%rbx,1)
istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18
; asm: testq %rcx, %rcx
; asm: je ebb1
brz v1, ebb1 ; bin: 48 85 c9 74 1b

View File

@@ -158,9 +158,13 @@ ebb0(v1: i32):
v6 = load.i64 aligned notrap v1
v7 = load.i64 v1-12
v8 = load.i64 notrap v1+0x1_0000
v9 = load_complex.i64 v1+v2
v10 = load_complex.i64 v1+v2+0x1
store v2, v1
store aligned v3, v1+12
store notrap aligned v3, v1-12
store_complex v3, v1+v2
store_complex v3, v1+v2+0x1
}
; sameln: function %memory(i32) fast {
; nextln: ebb0(v1: i32):
@@ -171,9 +175,13 @@ ebb0(v1: i32):
; nextln: v6 = load.i64 notrap aligned v1
; nextln: v7 = load.i64 v1-12
; nextln: v8 = load.i64 notrap v1+0x0001_0000
; nextln: v9 = load_complex.i64 v1+v2
; nextln: v10 = load_complex.i64 v1+v2+1
; nextln: store v2, v1
; nextln: store aligned v3, v1+12
; nextln: store notrap aligned v3, v1-12
; nextln: store_complex v3, v1+v2
; nextln: store_complex v3, v1+v2+1
; Register diversions.
; This test file has no ISA, so we can unly use register unit numbers.

View File

@@ -0,0 +1,95 @@
test postopt
set is_64bit
isa x86
function %dual_loads(i64, i64) -> i64 {
ebb0(v0: i64, v1: i64):
[RexOp1rr#8001] v3 = iadd v0, v1
v4 = load.i64 v3
v5 = uload8.i64 v3
v6 = sload8.i64 v3
v7 = uload16.i64 v3
v8 = sload16.i64 v3
v9 = uload32.i64 v3
v10 = sload32.i64 v3
[Op1ret#c3] return v10
}
; sameln: function %dual_loads
; nextln: ebb0(v0: i64, v1: i64):
; nextln: v3 = iadd v0, v1
; nextln: v4 = load_complex.i64 v0+v1
; nextln: v5 = uload8_complex.i64 v0+v1
; nextln: v6 = sload8_complex.i64 v0+v1
; nextln: v7 = uload16_complex.i64 v0+v1
; nextln: v8 = sload16_complex.i64 v0+v1
; nextln: v9 = uload32_complex v0+v1
; nextln: v10 = sload32_complex v0+v1
; nextln: return v10
; nextln: }
function %dual_loads2(i64, i64) -> i64 {
ebb0(v0: i64, v1: i64):
[RexOp1rr#8001] v3 = iadd v0, v1
v4 = load.i64 v3+1
v5 = uload8.i64 v3+1
v6 = sload8.i64 v3+1
v7 = uload16.i64 v3+1
v8 = sload16.i64 v3+1
v9 = uload32.i64 v3+1
v10 = sload32.i64 v3+1
[Op1ret#c3] return v10
}
; sameln: function %dual_loads2
; nextln: ebb0(v0: i64, v1: i64):
; nextln: v3 = iadd v0, v1
; nextln: v4 = load_complex.i64 v0+v1+1
; nextln: v5 = uload8_complex.i64 v0+v1+1
; nextln: v6 = sload8_complex.i64 v0+v1+1
; nextln: v7 = uload16_complex.i64 v0+v1+1
; nextln: v8 = sload16_complex.i64 v0+v1+1
; nextln: v9 = uload32_complex v0+v1+1
; nextln: v10 = sload32_complex v0+v1+1
; nextln: return v10
; nextln: }
function %dual_stores(i64, i64, i64) {
ebb0(v0: i64, v1: i64, v2: i64):
[RexOp1rr#8001] v3 = iadd v0, v1
[RexOp1st#8089] store.i64 v2, v3
[RexOp1st#88] istore8.i64 v2, v3
[RexMp1st#189] istore16.i64 v2, v3
[RexOp1st#89] istore32.i64 v2, v3
[Op1ret#c3] return
}
; sameln: function %dual_stores
; nextln: ebb0(v0: i64, v1: i64, v2: i64):
; nextln: v3 = iadd v0, v1
; nextln: store_complex v2, v0+v1
; nextln: istore8_complex v2, v0+v1
; nextln: istore16_complex v2, v0+v1
; nextln: istore32_complex v2, v0+v1
; nextln: return
; nextln: }
function %dual_stores2(i64, i64, i64) {
ebb0(v0: i64, v1: i64, v2: i64):
[RexOp1rr#8001] v3 = iadd v0, v1
[RexOp1stDisp8#8089] store.i64 v2, v3+1
[RexOp1stDisp8#88] istore8.i64 v2, v3+1
[RexMp1stDisp8#189] istore16.i64 v2, v3+1
[RexOp1stDisp8#89] istore32.i64 v2, v3+1
[Op1ret#c3] return
}
; sameln: function %dual_stores2
; nextln: ebb0(v0: i64, v1: i64, v2: i64):
; nextln: v3 = iadd v0, v1
; nextln: store_complex v2, v0+v1+1
; nextln: istore8_complex v2, v0+v1+1
; nextln: istore16_complex v2, v0+v1+1
; nextln: istore32_complex v2, v0+v1+1
; nextln: return
; nextln: }

View File

@@ -57,7 +57,9 @@ CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
FuncAddr = InstructionFormat(func_ref)
Load = InstructionFormat(memflags, VALUE, offset32)
LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32)
Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32)
StackLoad = InstructionFormat(stack_slot, offset32)
StackStore = InstructionFormat(VALUE, stack_slot, offset32)

View File

@@ -246,6 +246,7 @@ x = Operand('x', Mem, doc='Value to be stored')
a = Operand('a', Mem, doc='Value loaded')
p = Operand('p', iAddr)
Flags = Operand('Flags', memflags)
args = Operand('args', VARIABLE_ARGS, doc='Address arguments')
load = Instruction(
'load', r"""
@@ -256,6 +257,15 @@ load = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
load_complex = Instruction(
'load_complex', r"""
Load from memory at ``sum(args) + Offset``.
This is a polymorphic instruction that can load any value type which
has a memory representation.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
store = Instruction(
'store', r"""
Store ``x`` to memory at ``p + Offset``.
@@ -265,6 +275,16 @@ store = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
store_complex = Instruction(
'store_complex', r"""
Store ``x`` to memory at ``sum(args) + Offset``.
This is a polymorphic instruction that can store any value type with a
memory representation.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt8 = TypeVar(
'iExt8', 'An integer type with more than 8 bits',
ints=(16, 64))
@@ -279,6 +299,14 @@ uload8 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload8_complex = Instruction(
'uload8_complex', r"""
Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i8`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload8 = Instruction(
'sload8', r"""
Load 8 bits from memory at ``p + Offset`` and sign-extend.
@@ -287,6 +315,14 @@ sload8 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload8_complex = Instruction(
'sload8_complex', r"""
Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i8`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore8 = Instruction(
'istore8', r"""
Store the low 8 bits of ``x`` to memory at ``p + Offset``.
@@ -295,6 +331,14 @@ istore8 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore8_complex = Instruction(
'istore8_complex', r"""
Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt16 = TypeVar(
'iExt16', 'An integer type with more than 16 bits',
ints=(32, 64))
@@ -309,6 +353,14 @@ uload16 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload16_complex = Instruction(
'uload16_complex', r"""
Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i16`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload16 = Instruction(
'sload16', r"""
Load 16 bits from memory at ``p + Offset`` and sign-extend.
@@ -317,6 +369,14 @@ sload16 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload16_complex = Instruction(
'sload16_complex', r"""
Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i16`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore16 = Instruction(
'istore16', r"""
Store the low 16 bits of ``x`` to memory at ``p + Offset``.
@@ -325,6 +385,14 @@ istore16 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore16_complex = Instruction(
'istore16_complex', r"""
Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
""",
ins=(Flags, x, args, Offset), can_store=True)
iExt32 = TypeVar(
'iExt32', 'An integer type with more than 32 bits',
ints=(64, 64))
@@ -339,6 +407,14 @@ uload32 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
uload32_complex = Instruction(
'uload32_complex', r"""
Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
This is equivalent to ``load.i32`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
sload32 = Instruction(
'sload32', r"""
Load 32 bits from memory at ``p + Offset`` and sign-extend.
@@ -347,6 +423,14 @@ sload32 = Instruction(
""",
ins=(Flags, p, Offset), outs=a, can_load=True)
sload32_complex = Instruction(
'sload32_complex', r"""
Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
This is equivalent to ``load.i32`` followed by ``uextend``.
""",
ins=(Flags, args, Offset), outs=a, can_load=True)
istore32 = Instruction(
'istore32', r"""
Store the low 32 bits of ``x`` to memory at ``p + Offset``.
@@ -355,6 +439,14 @@ istore32 = Instruction(
""",
ins=(Flags, x, p, Offset), can_store=True)
istore32_complex = Instruction(
'istore32_complex', r"""
Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
""",
ins=(Flags, x, args, Offset), can_store=True)
x = Operand('x', Mem, doc='Value to be stored')
a = Operand('a', Mem, doc='Value loaded')
Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot')

View File

@@ -2,12 +2,12 @@
Cretonne predicates that consider `Function` fields.
"""
from cdsl.predicates import FieldPredicate
from .formats import UnaryGlobalVar
from .formats import UnaryGlobalVar, InstructionFormat
try:
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from cdsl.formats import FormatField # noqa
from cdsl.formats import InstructionFormat, FormatField # noqa
except ImportError:
pass
@@ -33,3 +33,10 @@ class IsColocatedData(FieldPredicate):
# type: () -> None
super(IsColocatedData, self).__init__(
UnaryGlobalVar.global_var, 'is_colocated_data', ('func',))
class LengthEquals(FieldPredicate):
def __init__(self, iform, num):
# type: (InstructionFormat, int) -> None
super(LengthEquals, self).__init__(
iform.args(), 'has_length_of', (num, 'func'))

View File

@@ -103,6 +103,19 @@ class InstructionFormat(object):
InstructionFormat._registry[sig] = self
InstructionFormat.all_formats.append(self)
def args(self):
# type: () -> FormatField
"""
Provides a ValueListField, which is derived from FormatField,
corresponding to the full ValueList of the instruction format. This
is useful for creating predicates for instructions which use variadic
arguments.
"""
if self.has_value_list:
return ValueListField(self)
return None
def _process_member_names(self, kinds):
# type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField] # noqa
"""
@@ -210,7 +223,7 @@ class FormatField(object):
This corresponds to a single member of a variant of the `InstructionData`
data type.
:param iformat: Parent `InstructionFormat`.
:param iform: Parent `InstructionFormat`.
:param immnum: Immediate operand number in parent.
:param kind: Immediate Operand kind.
:param member: Member name in `InstructionData` variant.
@@ -227,6 +240,29 @@ class FormatField(object):
# type: () -> str
return '{}.{}'.format(self.format.name, self.member)
def rust_destructuring_name(self):
# type: () -> str
return self.member
def rust_name(self):
# type: () -> str
return self.member
class ValueListField(FormatField):
"""
The full value list field of an instruction format.
This corresponds to all Value-type members of a variant of the
`InstructionData` format, which contains a ValueList.
:param iform: Parent `InstructionFormat`.
"""
def __init__(self, iform):
# type: (InstructionFormat) -> None
self.format = iform
self.member = "args"
def rust_destructuring_name(self):
# type: () -> str
return 'ref {}'.format(self.member)

View File

@@ -201,9 +201,10 @@ class Instruction(object):
# Prefer to use the typevar_operand to infer the controlling typevar.
self.use_typevar_operand = False
typevar_error = None
if self.format.typevar_operand is not None:
tv_op = self.format.typevar_operand
if tv_op is not None and tv_op < len(self.value_opnums):
try:
opnum = self.value_opnums[self.format.typevar_operand]
opnum = self.value_opnums[tv_op]
tv = self.ins[opnum].typevar
if tv is tv.free_typevar() or tv.singleton_type() is not None:
self.other_typevars = self._verify_ctrl_typevar(tv)

View File

@@ -27,7 +27,7 @@ def gen_recipe(recipe, fmt):
nvops = iform.num_value_operands
want_args = any(isinstance(i, RegClass) or isinstance(i, Stack)
for i in recipe.ins)
assert not want_args or nvops > 0
assert not want_args or nvops > 0 or iform.has_value_list
want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack)
for o in recipe.outs)

View File

@@ -103,7 +103,7 @@ def emit_instp(instp, fmt, has_func=False):
fnames = set() # type: Set[str]
for p in leafs:
if isinstance(p, FieldPredicate):
fnames.add(p.field.rust_name())
fnames.add(p.field.rust_destructuring_name())
else:
assert isinstance(p, TypePredicate)
has_type_check = True

View File

@@ -3,9 +3,9 @@ x86 Encodings.
"""
from __future__ import absolute_import
from cdsl.predicates import IsUnsignedInt, Not, And
from base.predicates import IsColocatedFunc, IsColocatedData
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
from base import instructions as base
from base.formats import UnaryImm, FuncAddr, Call
from base.formats import UnaryImm, FuncAddr, Call, LoadComplex, StoreComplex
from .defs import X86_64, X86_32
from . import recipes as r
from . import settings as cfg
@@ -19,6 +19,7 @@ try:
from typing import TYPE_CHECKING, Any # noqa
if TYPE_CHECKING:
from cdsl.instructions import MaybeBoundInst # noqa
from cdsl.predicates import FieldPredicate # noqa
except ImportError:
pass
@@ -54,6 +55,15 @@ def enc_x86_64(inst, recipe, *args, **kwargs):
X86_64.enc(inst, *recipe(*args, **kwargs))
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
def enc_both(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
"""
@@ -63,6 +73,15 @@ def enc_both(inst, recipe, *args, **kwargs):
enc_x86_64(inst, recipe, *args, **kwargs)
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
def enc_i32_i64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
@@ -80,6 +99,25 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
Similar to `enc_i32_i64` but applies `instp` to each encoding.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
"""
@@ -212,6 +250,31 @@ X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
#
# Loads and stores.
#
ldcomplexp = LengthEquals(LoadComplex, 2)
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
instp=ldcomplexp)
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
stcomplexp = LengthEquals(StoreComplex, 3)
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
for recipe in [r.stWithIndex_abcd,
r.stWithIndexDisp8_abcd,
r.stWithIndexDisp32_abcd]:
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
for recipe in [r.st, r.stDisp8, r.stDisp32]:
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
@@ -286,18 +349,34 @@ enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)

View File

@@ -14,6 +14,7 @@ from base.formats import IntSelect, IntCondTrap, FloatCondTrap
from base.formats import Jump, Branch, BranchInt, BranchFloat
from base.formats import Ternary, FuncAddr, UnaryGlobalVar
from base.formats import RegMove, RegSpill, RegFill, CopySpecial
from base.formats import LoadComplex, StoreComplex
from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE
from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG
from .registers import StackGPR32, StackFPR32
@@ -739,6 +740,22 @@ st = TailRecipe(
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset.
stWithIndex = TailRecipe(
'stWithIndex', StoreComplex, size=2,
ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store with no offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
st_abcd = TailRecipe(
@@ -754,6 +771,23 @@ st_abcd = TailRecipe(
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndex_abcd = TailRecipe(
'stWithIndex_abcd', StoreComplex, size=2,
ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store of FPR with no offset.
fst = TailRecipe(
'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(),
@@ -766,6 +800,20 @@ fst = TailRecipe(
PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
modrm_rm(in_reg1, in_reg0, sink);
''')
# XX /r register-indirect store with index and no offset of FPR.
fstWithIndex = TailRecipe(
'fstWithIndex', StoreComplex, size=2,
ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(),
instp=IsEqual(StoreComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
''')
# XX /r register-indirect store with 8-bit offset.
stDisp8 = TailRecipe(
@@ -781,6 +829,27 @@ stDisp8 = TailRecipe(
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset.
stWithIndexDisp8 = TailRecipe(
'stWithIndexDisp8', StoreComplex, size=3,
ins=(GPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 8-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stDisp8_abcd = TailRecipe(
'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(),
instp=IsSignedInt(Store.offset, 8),
@@ -795,6 +864,27 @@ stDisp8_abcd = TailRecipe(
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndexDisp8_abcd = TailRecipe(
'stWithIndexDisp8_abcd', StoreComplex, size=3,
ins=(ABCD, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 8-bit offset of FPR.
fstDisp8 = TailRecipe(
'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(),
instp=IsSignedInt(Store.offset, 8),
@@ -809,6 +899,24 @@ fstDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r register-indirect store with index and 8-bit offset of FPR.
fstWithIndexDisp8 = TailRecipe(
'fstWithIndexDisp8', StoreComplex, size=3,
ins=(FPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp8(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r register-indirect store with 32-bit offset.
stDisp32 = TailRecipe(
'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(),
@@ -822,6 +930,27 @@ stDisp32 = TailRecipe(
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset.
stWithIndexDisp32 = TailRecipe(
'stWithIndexDisp32', StoreComplex, size=6,
ins=(GPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with 32-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stDisp32_abcd = TailRecipe(
'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(),
when_prefixed=stDisp32,
@@ -835,6 +964,27 @@ stDisp32_abcd = TailRecipe(
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset.
# Only ABCD allowed for stored value. This is for byte stores with no REX.
stWithIndexDisp32_abcd = TailRecipe(
'stWithIndexDisp32_abcd', StoreComplex, size=6,
ins=(ABCD, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r register-indirect store with 32-bit offset of FPR.
fstDisp32 = TailRecipe(
'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(),
clobbers_flags=False,
@@ -848,6 +998,24 @@ fstDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r register-indirect store with index and 32-bit offset of FPR.
fstWithIndexDisp32 = TailRecipe(
'fstWithIndexDisp32', StoreComplex, size=6,
ins=(FPR, GPR, GPR_DEREF_SAFE),
outs=(),
instp=IsSignedInt(StoreComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
modrm_sib_disp32(in_reg0, sink);
sib(0, in_reg2, in_reg1, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# Unary spill with SIB and 32-bit displacement.
spillSib32 = TailRecipe(
'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
@@ -919,6 +1087,22 @@ ld = TailRecipe(
modrm_rm(in_reg0, out_reg0, sink);
''')
# XX /r load with index and no offset.
ldWithIndex = TailRecipe(
'ldWithIndex', LoadComplex, size=2,
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsEqual(LoadComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
''')
# XX /r float load with no offset.
fld = TailRecipe(
'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR),
@@ -932,6 +1116,22 @@ fld = TailRecipe(
modrm_rm(in_reg0, out_reg0, sink);
''')
# XX /r float load with index and no offset.
fldWithIndex = TailRecipe(
'fldWithIndex', LoadComplex, size=2,
ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsEqual(LoadComplex.offset, 0),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
''')
# XX /r load with 8-bit offset.
ldDisp8 = TailRecipe(
'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -947,6 +1147,24 @@ ldDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r load with index and 8-bit offset.
ldWithIndexDisp8 = TailRecipe(
'ldWithIndexDisp8', LoadComplex, size=3,
ins=(GPR, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsSignedInt(LoadComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp8(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r float load with 8-bit offset.
fldDisp8 = TailRecipe(
'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -962,6 +1180,24 @@ fldDisp8 = TailRecipe(
sink.put1(offset as u8);
''')
# XX /r float load with 8-bit offset.
fldWithIndexDisp8 = TailRecipe(
'fldWithIndexDisp8', LoadComplex, size=3,
ins=(GPR, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsSignedInt(LoadComplex.offset, 8),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp8(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put1(offset as u8);
''')
# XX /r load with 32-bit offset.
ldDisp32 = TailRecipe(
'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR),
@@ -977,6 +1213,24 @@ ldDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r load with index and 32-bit offset.
ldWithIndexDisp32 = TailRecipe(
'ldWithIndexDisp32', LoadComplex, size=6,
ins=(GPR, GPR_DEREF_SAFE),
outs=(GPR),
instp=IsSignedInt(LoadComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp32(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# XX /r float load with 32-bit offset.
fldDisp32 = TailRecipe(
'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR),
@@ -992,6 +1246,24 @@ fldDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# XX /r float load with index and 32-bit offset.
fldWithIndexDisp32 = TailRecipe(
'fldWithIndexDisp32', LoadComplex, size=6,
ins=(GPR, GPR_DEREF_SAFE),
outs=(FPR),
instp=IsSignedInt(LoadComplex.offset, 32),
clobbers_flags=False,
emit='''
if !flags.notrap() {
sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
}
PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
modrm_sib_disp32(out_reg0, sink);
sib(0, in_reg1, in_reg0, sink);
let offset: i32 = offset.into();
sink.put4(offset as u32);
''')
# Unary fill with SIB and 32-bit displacement.
fillSib32 = TailRecipe(
'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR,

View File

@@ -162,6 +162,11 @@ pub trait TargetIsa: fmt::Display {
false
}
/// Does the CPU implement multi-register addressing?
fn uses_complex_addresses(&self) -> bool {
false
}
/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;

View File

@@ -46,6 +46,18 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
BASE_REX | b | (r << 2)
}
// Create a three-register REX prefix, setting:
//
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
// REX.R = bit 3 of reg register.
// REX.X = bit 3 of SIB index register.
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
let b = ((rm >> 3) & 1) as u8;
let r = ((reg >> 3) & 1) as u8;
let x = ((index >> 3) & 1) as u8;
BASE_REX | b | (x << 1) | (r << 2)
}
// Emit a REX prefix.
//
// The R, X, and B bits are computed from registers using the functions above. The W bit is
@@ -211,7 +223,19 @@ fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS)
sink.put1(b);
}
/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_rm(0b100, reg, sink);
}
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
/// displacement are present.
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp8(0b100, reg, sink);
}
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
/// displacement are present.
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp32(0b100, reg, sink);
}
@@ -225,6 +249,16 @@ fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
sink.put1(b);
}
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
// SIB SS_III_BBB.
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
let scale = scale & 3;
let index = index as u8 & 7;
let base = base as u8 & 7;
let b: u8 = (scale << 6) | (index << 3) | base;
sink.put1(b);
}
/// Get the low 4 bits of an opcode for an integer condition code.
///
/// Add this offset to a base opcode for:

View File

@@ -62,6 +62,10 @@ impl TargetIsa for Isa {
true
}
fn uses_complex_addresses(&self) -> bool {
true
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}

View File

@@ -5,9 +5,9 @@
use cursor::{Cursor, EncCursor};
use ir::condcodes::{CondCode, FloatCC, IntCC};
use ir::dfg::ValueDef;
use ir::immediates::Imm64;
use ir::immediates::{Imm64, Offset32};
use ir::instructions::{Opcode, ValueList};
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value, Type, MemFlags};
use isa::TargetIsa;
use timing;
@@ -173,6 +173,158 @@ fn optimize_cpu_flags(
pos.func.update_encoding(info.br_inst, isa).is_ok();
}
struct MemOpInfo {
opcode: Opcode,
inst: Inst,
itype: Type,
arg: Value,
st_arg: Option<Value>,
flags: MemFlags,
offset: Offset32,
add_args: Option<[Value; 2]>,
}
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
let mut info = match pos.func.dfg[inst] {
InstructionData::Load {
opcode,
arg,
flags,
offset,
} => MemOpInfo {
opcode: opcode,
inst: inst,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: arg,
st_arg: None,
flags: flags,
offset: offset,
add_args: None,
},
InstructionData::Store {
opcode,
args,
flags,
offset,
} => MemOpInfo {
opcode: opcode,
inst: inst,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: args[1],
st_arg: Some(args[0]),
flags: flags,
offset: offset,
add_args: None,
},
_ => return,
};
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
match pos.func.dfg[result_inst] {
InstructionData::Binary { opcode, args } if opcode == Opcode::Iadd => {
info.add_args = Some(args.clone());
}
_ => return,
}
} else {
return;
}
match info.opcode {
Opcode::Load => {
pos.func.dfg.replace(info.inst).load_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload8 => {
pos.func.dfg.replace(info.inst).uload8_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload8 => {
pos.func.dfg.replace(info.inst).sload8_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload16 => {
pos.func.dfg.replace(info.inst).uload16_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload16 => {
pos.func.dfg.replace(info.inst).sload16_complex(
info.itype,
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Uload32 => {
pos.func.dfg.replace(info.inst).uload32_complex(
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Sload32 => {
pos.func.dfg.replace(info.inst).sload32_complex(
info.flags,
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Store => {
pos.func.dfg.replace(info.inst).store_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore8 => {
pos.func.dfg.replace(info.inst).istore8_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore16 => {
pos.func.dfg.replace(info.inst).istore16_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
Opcode::Istore32 => {
pos.func.dfg.replace(info.inst).istore32_complex(
info.flags,
info.st_arg.unwrap(),
&info.add_args.unwrap(),
info.offset,
);
}
_ => return,
}
pos.func.update_encoding(info.inst, isa).is_ok();
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
@@ -198,6 +350,10 @@ pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
}
}
}
if isa.uses_complex_addresses() {
optimize_complex_addresses(&mut pos, inst, isa);
}
}
}
}

View File

@@ -46,6 +46,11 @@ pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool
}
}
#[allow(dead_code)]
pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
value_list.len(&func.dfg.value_lists) == num
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -335,6 +335,12 @@ impl<'a> Verifier<'a> {
RegFill { src, .. } => {
self.verify_stack_slot(inst, src)?;
}
LoadComplex { ref args, .. } => {
self.verify_value_list(inst, args)?;
}
StoreComplex { ref args, .. } => {
self.verify_value_list(inst, args)?;
}
// Exhaustive list so we can't forget to add new formats
Unary { .. } |
@@ -1149,8 +1155,8 @@ impl<'a> Verifier<'a> {
mod tests {
use super::{Error, Verifier};
use entity::EntityList;
use ir::Function;
use ir::instructions::{InstructionData, Opcode};
use ir::Function;
use settings;
macro_rules! assert_err_with_msg {

View File

@@ -369,12 +369,44 @@ pub fn write_operands(
} => write!(w, " {}, {}{}", arg, stack_slot, offset),
HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
Load { flags, arg, offset, .. } => write!(w, "{} {}{}", flags, arg, offset),
LoadComplex {
flags,
ref args,
offset,
..
} => {
let args = args.as_slice(pool);
write!(
w,
"{} {}{}",
flags,
DisplayValuesWithDelimiter(&args, '+'),
offset
)
}
Store {
flags,
args,
offset,
..
} => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset),
StoreComplex {
flags,
ref args,
offset,
..
} => {
let args = args.as_slice(pool);
write!(
w,
"{} {}, {}{}",
flags,
args[0],
DisplayValuesWithDelimiter(&args[1..], '+'),
offset
)
}
RegMove { arg, src, dst, .. } => {
if let Some(isa) = isa {
let regs = isa.register_info();
@@ -450,6 +482,21 @@ impl<'a> fmt::Display for DisplayValues<'a> {
}
}
struct DisplayValuesWithDelimiter<'a>(&'a [Value], char);
impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result {
for (i, val) in self.0.iter().enumerate() {
if i == 0 {
write!(f, "{}", val)?;
} else {
write!(f, "{}{}", self.1, val)?;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use ir::types;

View File

@@ -22,6 +22,7 @@ pub enum Token<'a> {
LBracket, // '['
RBracket, // ']'
Minus, // '-'
Plus, // '+'
Comma, // ','
Dot, // '.'
Colon, // ':'
@@ -169,6 +170,25 @@ impl<'a> Lexer<'a> {
self.source[self.pos..].starts_with(prefix)
}
// Starting from `lookahead`, are we looking at a number?
fn looking_at_numeric(&self) -> bool {
if let Some(c) = self.lookahead {
if c.is_digit(10) {
return true;
}
match c {
'-' => return true,
'+' => return true,
'.' => return true,
_ => {}
}
if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
return true;
}
}
false
}
// Scan a single-char token.
fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
assert_ne!(self.lookahead, None);
@@ -234,16 +254,17 @@ impl<'a> Lexer<'a> {
match self.lookahead {
Some('-') => {
self.next_ch();
if let Some(c) = self.lookahead {
// If the next character won't parse as a number, we return Token::Minus
if !c.is_alphanumeric() && c != '.' {
if !self.looking_at_numeric() {
// If the next characters won't parse as a number, we return Token::Minus
return token(Token::Minus, loc);
}
}
}
Some('+') => {
self.next_ch();
if !self.looking_at_numeric() {
// If the next characters won't parse as a number, we return Token::Minus
return token(Token::Plus, loc);
}
}
_ => {}
}

View File

@@ -13,8 +13,8 @@ use cretonne_codegen::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFun
Type, Value, ValueLoc};
use cretonne_codegen::isa::{self, Encoding, RegUnit, TargetIsa};
use cretonne_codegen::packed_option::ReservedValue;
use cretonne_codegen::{settings, timing};
use cretonne_codegen::settings::CallConv;
use cretonne_codegen::{settings, timing};
use error::{Error, Location, Result};
use isaspec;
use lexer::{self, Lexer, Token};
@@ -1872,6 +1872,24 @@ impl<'a> Parser<'a> {
Ok(args)
}
fn parse_value_sequence(&mut self) -> Result<VariableArgs> {
let mut args = VariableArgs::new();
if let Some(Token::Value(v)) = self.token() {
args.push(v);
self.consume();
} else {
return Ok(args);
}
while self.optional(Token::Plus) {
args.push(self.match_value("expected value in argument list")?);
}
Ok(args)
}
// Parse an optional value list enclosed in parantheses.
fn parse_opt_value_list(&mut self) -> Result<VariableArgs> {
if !self.optional(Token::LPar) {
@@ -2267,6 +2285,17 @@ impl<'a> Parser<'a> {
offset,
}
}
InstructionFormat::LoadComplex => {
let flags = self.optional_memflags();
let args = self.parse_value_sequence()?;
let offset = self.optional_offset32()?;
InstructionData::LoadComplex {
opcode,
flags,
args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists),
offset,
}
}
InstructionFormat::Store => {
let flags = self.optional_memflags();
let arg = self.match_value("expected SSA value operand")?;
@@ -2283,6 +2312,23 @@ impl<'a> Parser<'a> {
offset,
}
}
InstructionFormat::StoreComplex => {
let flags = self.optional_memflags();
let src = self.match_value("expected SSA value operand")?;
self.match_token(
Token::Comma,
"expected ',' between operands",
)?;
let args = self.parse_value_sequence()?;
let offset = self.optional_offset32()?;
InstructionData::StoreComplex {
opcode,
flags,
args: args.into_value_list(&[src], &mut ctx.function.dfg.value_lists),
offset,
}
}
InstructionFormat::RegMove => {
let arg = self.match_value("expected SSA value operand")?;
self.match_token(
@@ -2402,9 +2448,9 @@ impl<'a> Parser<'a> {
#[cfg(test)]
mod tests {
use super::*;
use cretonne_codegen::ir::StackSlotKind;
use cretonne_codegen::ir::entities::AnyEntity;
use cretonne_codegen::ir::types;
use cretonne_codegen::ir::StackSlotKind;
use cretonne_codegen::ir::{ArgumentExtension, ArgumentPurpose};
use cretonne_codegen::settings::CallConv;
use error::Error;