Support big- and little-endian lane order with bitcast (#5196)

Add a MemFlags operand to the bitcast instruction, where only the
`big` and `little` flags are accepted.  These define the lane order
to be used when casting between types of different lane counts.

Update all users to pass an appropriate MemFlags argument.

Implement lane swaps where necessary in the s390x back-end.

This is the final part necessary to fix
https://github.com/bytecodealliance/wasmtime/issues/4566.
This commit is contained in:
Ulrich Weigand
2022-11-07 23:41:10 +01:00
committed by GitHub
parent 5cef53537b
commit 3e5938e65a
16 changed files with 295 additions and 51 deletions

View File

@@ -0,0 +1,79 @@
test compile precise-output
target s390x
;; Bitcast between integral types is a no-op.
function %bitcast_i8_i8(i8) -> i8 {
block0(v0: i8):
v1 = bitcast.i8 v0
return v1
}
; block0:
; br %r14
function %bitcast_i16_i16(i16) -> i16 {
block0(v0: i16):
v1 = bitcast.i16 v0
return v1
}
; block0:
; br %r14
function %bitcast_i32_i32(i32) -> i32 {
block0(v0: i32):
v1 = bitcast.i32 v0
return v1
}
; block0:
; br %r14
function %bitcast_i64_i64(i64) -> i64 {
block0(v0: i64):
v1 = bitcast.i64 v0
return v1
}
; block0:
; br %r14
function %bitcast_i128_i128(i128) -> i128 {
block0(v0: i128):
v1 = bitcast.i128 v0
return v1
}
; block0:
; vl %v0, 0(%r3)
; vst %v0, 0(%r2)
; br %r14
function %bitcast_r64_i64(r64) -> i64 {
block0(v0: r64):
v1 = bitcast.i64 v0
return v1
}
; block0:
; br %r14
function %bitcast_i64_r64(i64) -> r64 {
block0(v0: i64):
v1 = bitcast.r64 v0
return v1
}
; block0:
; br %r14
function %bitcast_r64_r64(r64) -> r64 {
block0(v0: r64):
v1 = bitcast.r64 v0
return v1
}
; block0:
; br %r14

View File

@@ -1200,3 +1200,21 @@ block0(v0: f32):
; vlgvf %r2, %v0, 0
; br %r14
function %bitcast_f32_f32(f32) -> f32 {
block0(v0: f32):
v1 = bitcast.f32 v0
return v1
}
; block0:
; br %r14
function %bitcast_f64_f64(f64) -> f64 {
block0(v0: f64):
v1 = bitcast.f64 v0
return v1
}
; block0:
; br %r14

View File

@@ -0,0 +1,76 @@
test compile precise-output
target s390x
;; Vector bitcast is a no-op if the lane count remains unchanged,
;; or if the ABI lane-order matches the specified byte order.
;; Otherwise, lane-swaps need to happen.
function %bitcast_i64x2_i32x4(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = bitcast.i32x4 big v0
return v1
}
; block0:
; br %r14
function %bitcast_i64x2_i32x4(i64x2) -> i32x4 {
block0(v0: i64x2):
v1 = bitcast.i32x4 little v0
return v1
}
; block0:
; vpdi %v3, %v24, %v24, 4
; vpdi %v5, %v3, %v3, 4
; verllg %v24, %v5, 32
; br %r14
function %bitcast_i64x2_i32x4(i64x2) -> i32x4 wasmtime_system_v {
block0(v0: i64x2):
v1 = bitcast.i32x4 big v0
return v1
}
; block0:
; vpdi %v3, %v24, %v24, 4
; vpdi %v5, %v3, %v3, 4
; verllg %v24, %v5, 32
; br %r14
function %bitcast_i64x2_i32x4(i64x2) -> i32x4 wasmtime_system_v {
block0(v0: i64x2):
v1 = bitcast.i32x4 little v0
return v1
}
; block0:
; br %r14
function %bitcast_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = bitcast.f64x2 big v0
return v1
}
; block0:
; br %r14
function %bitcast_i64x2_f64x2(i64x2) -> f64x2 {
block0(v0: i64x2):
v1 = bitcast.f64x2 little v0
return v1
}
; block0:
; br %r14
function %bitcast_i64x2_f64x2(i64x2) -> f64x2 wasmtime_system_v {
block0(v0: i64x2):
v1 = bitcast.f64x2 big v0
return v1
}
; block0:
; br %r14

View File

@@ -7,9 +7,9 @@ block0(v0: i32x4):
;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
;; and epilogue.
v1 = bitcast.f32x4 v0
v2 = bitcast.f64x2 v1
v3 = bitcast.i8x16 v2
v1 = bitcast.f32x4 little v0
v2 = bitcast.f64x2 little v1
v3 = bitcast.i8x16 little v2
return v3
}

View File

@@ -43,7 +43,7 @@ block0(v0: f32x4, v1: f32x4, v2: i32x4, v3: i32x4):
function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
block0(v0: i8x16, v1: i8x16, v2: i32x4):
v3 = bitcast.i8x16 v2
v3 = bitcast.i8x16 little v2
v4 = bitselect v3, v0, v1
return v4
}