* Start adding the load_complex and store_complex instructions. N.b.: The text format is not correct yet. Requires changes to the lexer and parser. I'm not sure why I needed to change the RuntimeError to Exception yet. Will fix. * Get first few encodings of load_complex working. Still needs var args type checking. * Clean up ModRM helper functions in binemit. * Implement 32-bit displace for load_complex * Use encoding helpers instead of doing them all by hand * Initial implementation of store_complex * Parse value list for load/store_complex with + as delimiter. Looks nice. * Add sign/zero-extension and size variants for load_complex. * Add size variants of store_complex. * Add asm helper lines to load/store complex bin tests. * Example of length-checking the instruction ValueList for an encoding. Extremely questionable implementation. * Fix Python linting issues * First draft of postopt pass to fold adds and loads into load_complex. Just simple loads for now. * Optimization pass now works with all types of loads. * Add store+add -> store_complex to postopt pass * Put complex address optimization behind ISA flag. * Add load/store complex for f32 and f64 * Fixes changes to lexer that broke NaN parsing. Abstracts away the repeated checks for whether or not the characters following a + or - are going to be parsed as a number or not. * Fix formatting issues * Fix register restrictions for complex addresses. * Encoding tests for x86-32. * Add documentation for newly added instructions, recipes, and cdsl changes. * Fix python formatting again * Apply value-list length predicates to all LoadComplex and StoreComplex instructions. * Add predicate types to new encoding helpers for mypy. * Import FieldPredicate to satisfy mypy. * Add and fix some "asm" strings in the encoding tests. * Line-up 'bin' comments in x86/binary64 test * Test parsing of offset-less store_complex instruction. * 'sNaN' not 'sNan' * Bounds check the lookup for polymorphic typevar operand. * Fix encodings for istore16_complex.
242 lines
6.1 KiB
Plaintext
242 lines
6.1 KiB
Plaintext
test cat
|
|
|
|
; The smallest possible function.
|
|
function %minimal() {
|
|
ebb0:
|
|
trap user0
|
|
}
|
|
; sameln: function %minimal() fast {
|
|
; nextln: ebb0:
|
|
; nextln: trap user0
|
|
; nextln: }
|
|
|
|
; Create and use values.
|
|
; Polymorphic instructions with type suffix.
|
|
function %ivalues() {
|
|
ebb0:
|
|
v0 = iconst.i32 2
|
|
v1 = iconst.i8 6
|
|
v2 = ishl v0, v1
|
|
}
|
|
; sameln: function %ivalues() fast {
|
|
; nextln: ebb0:
|
|
; nextln: v0 = iconst.i32 2
|
|
; nextln: v1 = iconst.i8 6
|
|
; nextln: v2 = ishl v0, v1
|
|
; nextln: }
|
|
|
|
; Create and use values.
|
|
; Polymorphic instructions with type suffix.
|
|
function %bvalues() {
|
|
ebb0:
|
|
v0 = bconst.b32 true
|
|
v1 = bconst.b8 false
|
|
v2 = bextend.b32 v1
|
|
v3 = bxor v0, v2
|
|
}
|
|
; sameln: function %bvalues() fast {
|
|
; nextln: ebb0:
|
|
; nextln: v0 = bconst.b32 true
|
|
; nextln: v1 = bconst.b8 false
|
|
; nextln: v2 = bextend.b32 v1
|
|
; nextln: v3 = bxor v0, v2
|
|
; nextln: }
|
|
|
|
; Polymorphic instruction controlled by second operand.
|
|
function %select() {
|
|
ebb0(v90: i32, v91: i32, v92: b1):
|
|
v0 = select v92, v90, v91
|
|
}
|
|
; sameln: function %select() fast {
|
|
; nextln: ebb0(v90: i32, v91: i32, v92: b1):
|
|
; nextln: v0 = select v92, v90, v91
|
|
; nextln: }
|
|
|
|
; Polymorphic instruction controlled by third operand.
|
|
function %selectif() system_v {
|
|
ebb0(v95: i32, v96: i32, v97: b1):
|
|
v98 = selectif.i32 eq v97, v95, v96
|
|
}
|
|
; sameln: function %selectif() system_v {
|
|
; nextln: ebb0(v95: i32, v96: i32, v97: b1):
|
|
; nextln: v98 = selectif.i32 eq v97, v95, v96
|
|
; nextln: }
|
|
|
|
; Lane indexes.
|
|
function %lanes() {
|
|
ebb0:
|
|
v0 = iconst.i32x4 2
|
|
v1 = extractlane v0, 3
|
|
v2 = insertlane v0, 1, v1
|
|
}
|
|
; sameln: function %lanes() fast {
|
|
; nextln: ebb0:
|
|
; nextln: v0 = iconst.i32x4 2
|
|
; nextln: v1 = extractlane v0, 3
|
|
; nextln: v2 = insertlane v0, 1, v1
|
|
; nextln: }
|
|
|
|
; Integer condition codes.
|
|
function %icmp(i32, i32) {
|
|
ebb0(v90: i32, v91: i32):
|
|
v0 = icmp eq v90, v91
|
|
v1 = icmp ult v90, v91
|
|
v2 = icmp_imm sge v90, -12
|
|
v3 = irsub_imm v91, 45
|
|
br_icmp eq v90, v91, ebb0(v91, v90)
|
|
}
|
|
; sameln: function %icmp(i32, i32) fast {
|
|
; nextln: ebb0(v90: i32, v91: i32):
|
|
; nextln: v0 = icmp eq v90, v91
|
|
; nextln: v1 = icmp ult v90, v91
|
|
; nextln: v2 = icmp_imm sge v90, -12
|
|
; nextln: v3 = irsub_imm v91, 45
|
|
; nextln: br_icmp eq v90, v91, ebb0(v91, v90)
|
|
; nextln: }
|
|
|
|
; Floating condition codes.
|
|
function %fcmp(f32, f32) {
|
|
ebb0(v90: f32, v91: f32):
|
|
v0 = fcmp eq v90, v91
|
|
v1 = fcmp uno v90, v91
|
|
v2 = fcmp lt v90, v91
|
|
}
|
|
; sameln: function %fcmp(f32, f32) fast {
|
|
; nextln: ebb0(v90: f32, v91: f32):
|
|
; nextln: v0 = fcmp eq v90, v91
|
|
; nextln: v1 = fcmp uno v90, v91
|
|
; nextln: v2 = fcmp lt v90, v91
|
|
; nextln: }
|
|
|
|
; The bitcast instruction has two type variables: The controlling type variable
|
|
; controls the outout type, and the input type is a free variable.
|
|
function %bitcast(i32, f32) {
|
|
ebb0(v90: i32, v91: f32):
|
|
v0 = bitcast.i8x4 v90
|
|
v1 = bitcast.i32 v91
|
|
}
|
|
; sameln: function %bitcast(i32, f32) fast {
|
|
; nextln: ebb0(v90: i32, v91: f32):
|
|
; nextln: v0 = bitcast.i8x4 v90
|
|
; nextln: v1 = bitcast.i32 v91
|
|
; nextln: }
|
|
|
|
; Stack slot references
|
|
function %stack() {
|
|
ss10 = spill_slot 8
|
|
ss2 = explicit_slot 4
|
|
ss3 = incoming_arg 4, offset 8
|
|
ss4 = outgoing_arg 4
|
|
ss5 = emergency_slot 4
|
|
|
|
ebb0:
|
|
v1 = stack_load.i32 ss10
|
|
v2 = stack_load.i32 ss10+4
|
|
stack_store v1, ss10+2
|
|
stack_store v2, ss2
|
|
}
|
|
; sameln: function %stack() fast {
|
|
; check: ss2 = explicit_slot 4
|
|
; check: ss3 = incoming_arg 4, offset 8
|
|
; check: ss4 = outgoing_arg 4
|
|
; check: ss5 = emergency_slot 4
|
|
; check: ss10 = spill_slot 8
|
|
|
|
; check: ebb0:
|
|
; nextln: v1 = stack_load.i32 ss10
|
|
; nextln: v2 = stack_load.i32 ss10+4
|
|
; nextln: stack_store v1, ss10+2
|
|
; nextln: stack_store v2, ss2
|
|
|
|
; Memory access instructions.
|
|
function %memory(i32) {
|
|
ebb0(v1: i32):
|
|
v2 = load.i64 v1
|
|
v3 = load.i64 aligned v1
|
|
v4 = load.i64 notrap v1
|
|
v5 = load.i64 notrap aligned v1
|
|
v6 = load.i64 aligned notrap v1
|
|
v7 = load.i64 v1-12
|
|
v8 = load.i64 notrap v1+0x1_0000
|
|
v9 = load_complex.i64 v1+v2
|
|
v10 = load_complex.i64 v1+v2+0x1
|
|
store v2, v1
|
|
store aligned v3, v1+12
|
|
store notrap aligned v3, v1-12
|
|
store_complex v3, v1+v2
|
|
store_complex v3, v1+v2+0x1
|
|
}
|
|
; sameln: function %memory(i32) fast {
|
|
; nextln: ebb0(v1: i32):
|
|
; nextln: v2 = load.i64 v1
|
|
; nextln: v3 = load.i64 aligned v1
|
|
; nextln: v4 = load.i64 notrap v1
|
|
; nextln: v5 = load.i64 notrap aligned v1
|
|
; nextln: v6 = load.i64 notrap aligned v1
|
|
; nextln: v7 = load.i64 v1-12
|
|
; nextln: v8 = load.i64 notrap v1+0x0001_0000
|
|
; nextln: v9 = load_complex.i64 v1+v2
|
|
; nextln: v10 = load_complex.i64 v1+v2+1
|
|
; nextln: store v2, v1
|
|
; nextln: store aligned v3, v1+12
|
|
; nextln: store notrap aligned v3, v1-12
|
|
; nextln: store_complex v3, v1+v2
|
|
; nextln: store_complex v3, v1+v2+1
|
|
|
|
; Register diversions.
|
|
; This test file has no ISA, so we can unly use register unit numbers.
|
|
function %diversion(i32) {
|
|
ss0 = spill_slot 4
|
|
|
|
ebb0(v1: i32):
|
|
regmove v1, %10 -> %20
|
|
regmove v1, %20 -> %10
|
|
regspill v1, %10 -> ss0
|
|
regfill v1, ss0 -> %10
|
|
return
|
|
}
|
|
; sameln: function %diversion(i32) fast {
|
|
; nextln: ss0 = spill_slot 4
|
|
; check: ebb0(v1: i32):
|
|
; nextln: regmove v1, %10 -> %20
|
|
; nextln: regmove v1, %20 -> %10
|
|
; nextln: regspill v1, %10 -> ss0
|
|
; nextln: regfill v1, ss0 -> %10
|
|
; nextln: return
|
|
; nextln: }
|
|
|
|
; Register copies.
|
|
function %copy_special() {
|
|
ebb0:
|
|
copy_special %10 -> %20
|
|
copy_special %20 -> %10
|
|
return
|
|
}
|
|
; sameln: function %copy_special() fast {
|
|
; nextln: ebb0:
|
|
; nextln: copy_special %10 -> %20
|
|
; nextln: copy_special %20 -> %10
|
|
; nextln: return
|
|
; nextln: }
|
|
|
|
function %cond_traps(i32) {
|
|
ebb0(v0: i32):
|
|
trapz v0, stk_ovf
|
|
v1 = ifcmp_imm v0, 5
|
|
trapif ugt v1, oob
|
|
v2 = bitcast.f32 v1
|
|
v3 = ffcmp v2, v2
|
|
trapff uno v3, int_ovf
|
|
return
|
|
}
|
|
; sameln: function %cond_traps(i32)
|
|
; nextln: ebb0(v0: i32):
|
|
; nextln: trapz v0, stk_ovf
|
|
; nextln: v1 = ifcmp_imm v0, 5
|
|
; nextln: trapif ugt v1, oob
|
|
; nextln: v2 = bitcast.f32 v1
|
|
; nextln: v3 = ffcmp v2, v2
|
|
; nextln: trapff uno v3, int_ovf
|
|
; nextln: return
|
|
; nextln: }
|