* ABI: implement register arguments with constraints. Currently, Cranelift's ABI code emits a sequence of moves from physical registers into vregs at the top of the function body, one for every register-carried argument. For a number of reasons, we want to move to operand constraints instead, and remove the use of explicitly-named "pinned vregs"; this allows for better regalloc in theory, as it removes the need to "reverse-engineer" the sequence of moves. This PR alters the ABI code so that it generates a single "args" pseudo-instruction as the first instruction in the function body. This pseudo-inst defs all register arguments, and constrains them to the appropriate registers at the def-point. Subsequently the regalloc can move them wherever it needs to. Some care was taken not to have this pseudo-inst show up in post-regalloc disassemblies, but the change did cause a general regalloc "shift" in many tests, so the precise-output updates are a bit noisy. Sorry about that! A subsequent PR will handle the other half of the ABI code, namely, the callsite case, with a similar preg-to-constraint conversion. * Update based on review feedback. * Review feedback.
223 lines
4.0 KiB
Plaintext
223 lines
4.0 KiB
Plaintext
test compile precise-output
|
|
target s390x
|
|
|
|
function %snarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = snarrow.i64x2 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpksg %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %snarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = snarrow.i32x4 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpksf %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %snarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = snarrow.i16x8 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpksh %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %unarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = unarrow.i64x2 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vgbm %v4, 0
|
|
; vmxg %v6, %v24, %v4
|
|
; vmxg %v16, %v25, %v4
|
|
; vpklsg %v24, %v16, %v6
|
|
; br %r14
|
|
|
|
function %unarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = unarrow.i32x4 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vgbm %v4, 0
|
|
; vmxf %v6, %v24, %v4
|
|
; vmxf %v16, %v25, %v4
|
|
; vpklsf %v24, %v16, %v6
|
|
; br %r14
|
|
|
|
function %unarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = unarrow.i16x8 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vgbm %v4, 0
|
|
; vmxh %v6, %v24, %v4
|
|
; vmxh %v16, %v25, %v4
|
|
; vpklsh %v24, %v16, %v6
|
|
; br %r14
|
|
|
|
function %uunarrow_i64x2_i32x4(i64x2, i64x2) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i64x2, v1: i64x2):
|
|
v2 = uunarrow.i64x2 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpklsg %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %uunarrow_i32x4_i16x8(i32x4, i32x4) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i32x4, v1: i32x4):
|
|
v2 = uunarrow.i32x4 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpklsf %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %uunarrow_i16x8_i8x16(i16x8, i16x8) -> i8x16 wasmtime_system_v {
|
|
block0(v0: i16x8, v1: i16x8):
|
|
v2 = uunarrow.i16x8 v0, v1
|
|
return v2
|
|
}
|
|
|
|
; block0:
|
|
; vpklsh %v24, %v25, %v24
|
|
; br %r14
|
|
|
|
function %swiden_low_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
|
|
block0(v0: i32x4):
|
|
v1 = swiden_low.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplf %v24, %v24
|
|
; br %r14
|
|
|
|
function %swiden_low_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i16x8):
|
|
v1 = swiden_low.i16x8 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplh %v24, %v24
|
|
; br %r14
|
|
|
|
function %swiden_low_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i8x16):
|
|
v1 = swiden_low.i8x16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplb %v24, %v24
|
|
; br %r14
|
|
|
|
function %swiden_high_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
|
|
block0(v0: i32x4):
|
|
v1 = swiden_high.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuphf %v24, %v24
|
|
; br %r14
|
|
|
|
function %swiden_high_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i16x8):
|
|
v1 = swiden_high.i16x8 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuphh %v24, %v24
|
|
; br %r14
|
|
|
|
function %swiden_high_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i8x16):
|
|
v1 = swiden_high.i8x16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuphb %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_low_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
|
|
block0(v0: i32x4):
|
|
v1 = uwiden_low.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vupllf %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_low_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i16x8):
|
|
v1 = uwiden_low.i16x8 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vupllh %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_low_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i8x16):
|
|
v1 = uwiden_low.i8x16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vupllb %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_high_i32x4_i64x2(i32x4) -> i64x2 wasmtime_system_v {
|
|
block0(v0: i32x4):
|
|
v1 = uwiden_high.i32x4 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplhf %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_high_i16x8_i32x4(i16x8) -> i32x4 wasmtime_system_v {
|
|
block0(v0: i16x8):
|
|
v1 = uwiden_high.i16x8 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplhh %v24, %v24
|
|
; br %r14
|
|
|
|
function %uwiden_high_i8x16_i16x8(i8x16) -> i16x8 wasmtime_system_v {
|
|
block0(v0: i8x16):
|
|
v1 = uwiden_high.i8x16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; vuplhb %v24, %v24
|
|
; br %r14
|
|
|