cranelift: Add Bswap instruction (#1092) (#5147)

Adds Bswap to the Cranelift IR. Implements the Bswap instruction
in the x64 and aarch64 codegen backends. Cranelift users can now:
```
builder.ins().bswap(value)
```
to get a native byteswap instruction.

* x64: implements the 32- and 64-bit bswap instruction, following
the pattern set by similar unary instrutions (Neg and Not) - it
only operates on a dst register, but is parameterized with both
a src and dst which are expected to be the same register.

As x64 bswap instruction is only for 32- or 64-bit registers,
the 16-bit swap is implemented as a rotate left by 8.

Updated x64 RexFlags type to support emitting for single-operand
instructions like bswap

* aarch64: Bswap gets emitted as aarch64 rev16, rev32,
or rev64 instruction as appropriate.

* s390x: Bswap was already supported in backend, just had to add
a bit of plumbing

* For completeness, added bswap to the interpreter as well.

* added filetests and runtests for each ISA

* added bswap to fuzzgen, thanks to afonso360 for the code there

* 128-bit swaps are not yet implemented, that can be done later
This commit is contained in:
11evan
2022-10-31 12:30:00 -07:00
committed by GitHub
parent 95ecb7e4d4
commit 4ca9e82bd1
24 changed files with 455 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
test compile precise-output
set unwind_info=false
target aarch64
function %f0(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; block0:
; rev64 x0, x0
; ret
function %f1(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; block0:
; rev32 w0, w0
; ret
function %f2(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; block0:
; rev16 w0, w0
; ret

View File

@@ -0,0 +1,34 @@
test compile precise-output
target s390x
function %bswap_i64(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; block0:
; lrvgr %r2, %r2
; br %r14
function %bswap_i32(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; block0:
; lrvr %r2, %r2
; br %r14
function %bswap_i16(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; block0:
; lrvr %r5, %r2
; srlk %r2, %r5, 16
; br %r14

View File

@@ -0,0 +1,48 @@
test compile precise-output
target x86_64
function %f0(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; bswapq %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f1(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; bswapl %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; rolw $8, %ax, %ax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,58 @@
test interpret
test run
target x86_64
target aarch64
target s390x
function %bswap_i16(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; run: %bswap_i16(0) == 0
; run: %bswap_i16(1) == 0x0100
; run: %bswap_i16(0x1234) == 0x3412
; run: %bswap_i16(-2) == 0xFEFF
function %bswap_i32(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; run: %bswap_i32(0) == 0
; run: %bswap_i32(1) == 0x01000000
; run: %bswap_i32(0x12345678) == 0x78563412
; run: %bswap_i32(-2) == 0xFEFFFFFF
function %bswap_i64(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; run: %bswap_i64(0) == 0
; run: %bswap_i64(1) == 0x0100000000000000
; run: %bswap_i64(0x123456789ABCDEF0) == 0xF0DEBC9A78563412
; run: %bswap_i64(-2) == 0xFEFFFFFFFFFFFFFF
function %fuzzer_case_0() -> i8, i32, i64 {
block0:
v5 = iconst.i64 0x9903_5204_d05f_abab
v6 = bswap v5
v7 = iconst.i8 0
v8 = iconst.i32 0
return v7, v8, v6
}
; run: %fuzzer_case_0() == [0, 0, 0xabab_5fd0_0452_0399]
function %fuzzer_case_1(f32, f64, i32, i32, f64) -> i8, i32, i64 {
block0(v0: f32, v1: f64, v2: i32, v3: i32, v4: f64):
v5 = iconst.i64 0x9903_5204_d05f_abab
v6 = bswap v5
v7 = iconst.i8 0
v8 = iconst.i32 0
return v7, v8, v6
}
; run: %fuzzer_case_1(0.0, 0.0, 0, 0, 0.0) == [0, 0, 0xabab_5fd0_0452_0399]

View File

@@ -0,0 +1,12 @@
test interpret
function %bswap_i128(i128) -> i128 {
block0(v0: i128):
v1 = bswap v0
return v1
}
; run: %bswap_i128(0) == 0
; run: %bswap_i128(1) == 0x01000000_00000000_00000000_00000000
; run: %bswap_i128(0x12345678_9ABCDEF0_CAFEF00D_F00DCAFE) == 0xFECA0DF0_0DF0FECA_F0DEBC9A_78563412
; run: %bswap_i128(-2) == 0xFEFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF