cranelift: Add Bswap instruction (#1092) (#5147)

Adds Bswap to the Cranelift IR. Implements the Bswap instruction
in the x64 and aarch64 codegen backends. Cranelift users can now:
```
builder.ins().bswap(value)
```
to get a native byteswap instruction.

* x64: implements the 32- and 64-bit bswap instruction, following
the pattern set by similar unary instrutions (Neg and Not) - it
only operates on a dst register, but is parameterized with both
a src and dst which are expected to be the same register.

As x64 bswap instruction is only for 32- or 64-bit registers,
the 16-bit swap is implemented as a rotate left by 8.

Updated x64 RexFlags type to support emitting for single-operand
instructions like bswap

* aarch64: Bswap gets emitted as aarch64 rev16, rev32,
or rev64 instruction as appropriate.

* s390x: Bswap was already supported in backend, just had to add
a bit of plumbing

* For completeness, added bswap to the interpreter as well.

* added filetests and runtests for each ISA

* added bswap to fuzzgen, thanks to afonso360 for the code there

* 128-bit swaps are not yet implemented, that can be done later
This commit is contained in:
11evan
2022-10-31 12:30:00 -07:00
committed by GitHub
parent 95ecb7e4d4
commit 4ca9e82bd1
24 changed files with 455 additions and 0 deletions

View File

@@ -643,6 +643,12 @@ pub(crate) fn define(
TypeSetBuilder::new().ints(Interval::All).build(),
);
let iSwappable = &TypeVar::new(
"iSwappable",
"A multi byte scalar integer type",
TypeSetBuilder::new().ints(16..128).build(),
);
let iAddr = &TypeVar::new(
"iAddr",
"An integer address type",
@@ -2699,6 +2705,23 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let x = &Operand::new("x", iSwappable);
let a = &Operand::new("a", iSwappable);
ig.push(
Inst::new(
"bswap",
r#"
Reverse the byte order of an integer.
Reverses the bytes in ``x``.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
let x = &Operand::new("x", Int);
let a = &Operand::new("a", Int);

View File

@@ -1023,6 +1023,10 @@
(RBit)
(Clz)
(Cls)
;; Byte reverse
(Rev16)
(Rev32)
(Rev64)
))
(type MemLabel extern (enum))
@@ -2579,6 +2583,17 @@
(decl a64_cls (Type Reg) Reg)
(rule (a64_cls ty x) (bit_rr (BitOp.Cls) ty x))
;; Helpers for generating `rev` instructions
(decl a64_rev16 (Type Reg) Reg)
(rule (a64_rev16 ty x) (bit_rr (BitOp.Rev16) ty x))
(decl a64_rev32 (Type Reg) Reg)
(rule (a64_rev32 ty x) (bit_rr (BitOp.Rev32) ty x))
(decl a64_rev64 (Type Reg) Reg)
(rule (a64_rev64 ty x) (bit_rr (BitOp.Rev64) ty x))
;; Helpers for generating `eon` instructions.
(decl eon (Type Reg Reg) Reg)

View File

@@ -934,6 +934,9 @@ impl MachInstEmit for Inst {
BitOp::RBit => (0b00000, 0b000000),
BitOp::Clz => (0b00000, 0b000100),
BitOp::Cls => (0b00000, 0b000101),
BitOp::Rev16 => (0b00000, 0b000001),
BitOp::Rev32 => (0b00000, 0b000010),
BitOp::Rev64 => (0b00000, 0b000011),
};
sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
}

View File

@@ -1375,6 +1375,61 @@ fn test_aarch64_binemit() {
"cls x21, x16",
));
insns.push((
Inst::BitRR {
op: BitOp::Rev16,
size: OperandSize::Size64,
rd: writable_xreg(2),
rn: xreg(11),
},
"6205C0DA",
"rev16 x2, x11",
));
insns.push((
Inst::BitRR {
op: BitOp::Rev16,
size: OperandSize::Size32,
rd: writable_xreg(3),
rn: xreg(21),
},
"A306C05A",
"rev16 w3, w21",
));
insns.push((
Inst::BitRR {
op: BitOp::Rev32,
size: OperandSize::Size64,
rd: writable_xreg(2),
rn: xreg(11),
},
"6209C0DA",
"rev32 x2, x11",
));
insns.push((
Inst::BitRR {
op: BitOp::Rev32,
size: OperandSize::Size32,
rd: writable_xreg(3),
rn: xreg(21),
},
"A30AC05A",
"rev32 w3, w21",
));
insns.push((
Inst::BitRR {
op: BitOp::Rev64,
size: OperandSize::Size64,
rd: writable_xreg(1),
rn: xreg(10),
},
"410DC0DA",
"rev64 x1, x10",
));
insns.push((
Inst::ULoad8 {
rd: writable_xreg(1),

View File

@@ -67,6 +67,9 @@ impl BitOp {
BitOp::RBit => "rbit",
BitOp::Clz => "clz",
BitOp::Cls => "cls",
BitOp::Rev16 => "rev16",
BitOp::Rev32 => "rev32",
BitOp::Rev64 => "rev64",
}
}
}

View File

@@ -1517,6 +1517,17 @@
(rule -1 (lower (has_type ty (cls x)))
(a64_cls ty x))
;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16 (bswap x)))
(a64_rev16 $I16 x))
(rule (lower (has_type $I32 (bswap x)))
(a64_rev32 $I32 x))
(rule (lower (has_type $I64 (bswap x)))
(a64_rev64 $I64 x))
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Bmask tests the value against zero, and uses `csetm` to assert the result.

View File

@@ -96,6 +96,8 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz => implemented_in_isle(ctx),
Opcode::Bswap => implemented_in_isle(ctx),
Opcode::Popcnt => implemented_in_isle(ctx),
Opcode::Load

View File

@@ -1188,6 +1188,18 @@
7 6 5 4 3 2 1 0))))
;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16 (bswap x)))
(lshr_imm $I32 (bswap_reg $I32 x) 16))
(rule (lower (has_type $I32 (bswap x)))
(bswap_reg $I32 x))
(rule (lower (has_type $I64 (bswap x)))
(bswap_reg $I64 x))
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The FLOGR hardware instruction always operates on the full 64-bit register.

View File

@@ -100,6 +100,7 @@ impl LowerBackend for S390xBackend {
| Opcode::Vselect
| Opcode::Bmask
| Opcode::Bitrev
| Opcode::Bswap
| Opcode::Clz
| Opcode::Cls
| Opcode::Ctz

View File

@@ -105,6 +105,21 @@ impl RexFlags {
(self.0 & 2) != 0
}
#[inline(always)]
pub(crate) fn emit_one_op(&self, sink: &mut MachBuffer<Inst>, enc_e: u8) {
// Register Operand coded in Opcode Byte
// REX.R and REX.X unused
// REX.B == 1 accesses r8-r15
let w = if self.must_clear_w() { 0 } else { 1 };
let r = 0;
let x = 0;
let b = (enc_e >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
#[inline(always)]
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
let w = if self.must_clear_w() { 0 } else { 1 };

View File

@@ -151,6 +151,11 @@
(Setcc (cc CC)
(dst WritableGpr))
;; Swaps byte order in register
(Bswap (size OperandSize) ;; 4 or 8
(src Gpr)
(dst WritableGpr))
;; =========================================
;; Conditional moves.
@@ -1959,6 +1964,16 @@
(rule (x64_sar ty src1 src2)
(shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
;; Helper for creating byteswap instructions.
;; In x64, 32- and 64-bit registers use BSWAP instruction, and
;; for 16-bit registers one must instead use xchg or rol/ror
(decl x64_bswap (Type Gpr) Gpr)
(rule (x64_bswap ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.Bswap size src dst))))
dst))
;; Helper for creating `MInst.CmpRmiR` instructions.
(decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
(rule (cmp_rmi_r size opcode src1 src2)

View File

@@ -1115,6 +1115,21 @@ pub(crate) fn emit(
);
}
Inst::Bswap { size, src, dst } => {
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
debug_assert_eq!(src, dst);
let enc_reg = int_reg_enc(dst);
// BSWAP reg32 is (REX.W==0) 0F C8
// BSWAP reg64 is (REX.W==1) 0F C8
let rex_flags = RexFlags::from(*size);
rex_flags.emit_one_op(sink, enc_reg);
sink.put1(0x0F);
sink.put1(0xC8 | (enc_reg & 7));
}
Inst::Cmove {
size,
cc,

View File

@@ -107,6 +107,13 @@ impl Inst {
Inst::Setcc { cc, dst }
}
fn bswap(size: OperandSize, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().class() == RegClass::Int);
let src = Gpr::new(dst.to_reg()).unwrap();
let dst = WritableGpr::from_writable_reg(dst).unwrap();
Inst::Bswap { size, src, dst }
}
fn xmm_rm_r_imm(
op: SseOpcode,
src: RegMem,
@@ -3505,6 +3512,55 @@ fn test_x64_emit() {
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b"));
insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b"));
// ========================================================
// Bswap
insns.push((
Inst::bswap(OperandSize::Size64, w_rax),
"480FC8",
"bswapq %rax, %rax",
));
insns.push((
Inst::bswap(OperandSize::Size64, w_r8),
"490FC8",
"bswapq %r8, %r8",
));
insns.push((
Inst::bswap(OperandSize::Size32, w_rax),
"0FC8",
"bswapl %eax, %eax",
));
insns.push((
Inst::bswap(OperandSize::Size64, w_rcx),
"480FC9",
"bswapq %rcx, %rcx",
));
insns.push((
Inst::bswap(OperandSize::Size32, w_rcx),
"0FC9",
"bswapl %ecx, %ecx",
));
insns.push((
Inst::bswap(OperandSize::Size64, w_r11),
"490FCB",
"bswapq %r11, %r11",
));
insns.push((
Inst::bswap(OperandSize::Size32, w_r11),
"410FCB",
"bswapl %r11d, %r11d",
));
insns.push((
Inst::bswap(OperandSize::Size64, w_r14),
"490FCE",
"bswapq %r14, %r14",
));
insns.push((
Inst::bswap(OperandSize::Size32, w_r14),
"410FCE",
"bswapl %r14d, %r14d",
));
// ========================================================
// Cmove
insns.push((

View File

@@ -68,6 +68,7 @@ impl Inst {
Inst::AluRmiR { .. }
| Inst::AluRM { .. }
| Inst::AtomicRmwSeq { .. }
| Inst::Bswap { .. }
| Inst::CallKnown { .. }
| Inst::CallUnknown { .. }
| Inst::CheckedDivOrRemSeq { .. }
@@ -1373,6 +1374,17 @@ impl PrettyPrint for Inst {
format!("{} {}", ljustify2("set".to_string(), cc.to_string()), dst)
}
Inst::Bswap { size, src, dst } => {
let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
format!(
"{} {}, {}",
ljustify2("bswap".to_string(), suffix_bwlq(*size)),
src,
dst
)
}
Inst::Cmove {
size,
cc,
@@ -1953,6 +1965,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
Inst::Setcc { dst, .. } => {
collector.reg_def(dst.to_writable_reg());
}
Inst::Bswap { src, dst, .. } => {
collector.reg_use(src.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
}
Inst::Cmove {
consequent,
alternative,

View File

@@ -2065,6 +2065,19 @@
hi32)))
swap32))
;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; x64 bswap instruction is only for 32- or 64-bit swaps
;; implement the 16-bit swap as a rotl by 8
(rule (lower (has_type $I16 (bswap src)))
(x64_rotl $I16 src (Imm8Reg.Imm8 8)))
(rule (lower (has_type $I32 (bswap src)))
(x64_bswap $I32 src))
(rule (lower (has_type $I64 (bswap src)))
(x64_bswap $I64 src))
;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Null references are represented by the constant value `0`.

View File

@@ -363,6 +363,7 @@ fn lower_insn_to_regs(
| Opcode::Ctz
| Opcode::Popcnt
| Opcode::Bitrev
| Opcode::Bswap
| Opcode::IsNull
| Opcode::IsInvalid
| Opcode::Uextend

View File

@@ -0,0 +1,34 @@
test compile precise-output
set unwind_info=false
target aarch64
function %f0(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; block0:
; rev64 x0, x0
; ret
function %f1(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; block0:
; rev32 w0, w0
; ret
function %f2(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; block0:
; rev16 w0, w0
; ret

View File

@@ -0,0 +1,34 @@
test compile precise-output
target s390x
function %bswap_i64(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; block0:
; lrvgr %r2, %r2
; br %r14
function %bswap_i32(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; block0:
; lrvr %r2, %r2
; br %r14
function %bswap_i16(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; block0:
; lrvr %r5, %r2
; srlk %r2, %r5, 16
; br %r14

View File

@@ -0,0 +1,48 @@
test compile precise-output
target x86_64
function %f0(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; bswapq %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f1(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; bswapl %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; rolw $8, %ax, %ax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -0,0 +1,58 @@
test interpret
test run
target x86_64
target aarch64
target s390x
function %bswap_i16(i16) -> i16 {
block0(v0: i16):
v1 = bswap v0
return v1
}
; run: %bswap_i16(0) == 0
; run: %bswap_i16(1) == 0x0100
; run: %bswap_i16(0x1234) == 0x3412
; run: %bswap_i16(-2) == 0xFEFF
function %bswap_i32(i32) -> i32 {
block0(v0: i32):
v1 = bswap v0
return v1
}
; run: %bswap_i32(0) == 0
; run: %bswap_i32(1) == 0x01000000
; run: %bswap_i32(0x12345678) == 0x78563412
; run: %bswap_i32(-2) == 0xFEFFFFFF
function %bswap_i64(i64) -> i64 {
block0(v0: i64):
v1 = bswap v0
return v1
}
; run: %bswap_i64(0) == 0
; run: %bswap_i64(1) == 0x0100000000000000
; run: %bswap_i64(0x123456789ABCDEF0) == 0xF0DEBC9A78563412
; run: %bswap_i64(-2) == 0xFEFFFFFFFFFFFFFF
function %fuzzer_case_0() -> i8, i32, i64 {
block0:
v5 = iconst.i64 0x9903_5204_d05f_abab
v6 = bswap v5
v7 = iconst.i8 0
v8 = iconst.i32 0
return v7, v8, v6
}
; run: %fuzzer_case_0() == [0, 0, 0xabab_5fd0_0452_0399]
function %fuzzer_case_1(f32, f64, i32, i32, f64) -> i8, i32, i64 {
block0(v0: f32, v1: f64, v2: i32, v3: i32, v4: f64):
v5 = iconst.i64 0x9903_5204_d05f_abab
v6 = bswap v5
v7 = iconst.i8 0
v8 = iconst.i32 0
return v7, v8, v6
}
; run: %fuzzer_case_1(0.0, 0.0, 0, 0, 0.0) == [0, 0, 0xabab_5fd0_0452_0399]

View File

@@ -0,0 +1,12 @@
test interpret
function %bswap_i128(i128) -> i128 {
block0(v0: i128):
v1 = bswap v0
return v1
}
; run: %bswap_i128(0) == 0
; run: %bswap_i128(1) == 0x01000000_00000000_00000000_00000000
; run: %bswap_i128(0x12345678_9ABCDEF0_CAFEF00D_F00DCAFE) == 0xFECA0DF0_0DF0FECA_F0DEBC9A_78563412
; run: %bswap_i128(-2) == 0xFEFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF

View File

@@ -671,6 +671,13 @@ const OPCODE_SIGNATURES: &'static [(
(Opcode::Bmask, &[I32], &[I128], insert_opcode),
(Opcode::Bmask, &[I64], &[I128], insert_opcode),
(Opcode::Bmask, &[I128], &[I128], insert_opcode),
// Bswap
(Opcode::Bswap, &[I16, I16], &[I16], insert_opcode),
(Opcode::Bswap, &[I32, I32], &[I32], insert_opcode),
(Opcode::Bswap, &[I64, I64], &[I64], insert_opcode),
// I128 version not yet implemented.
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
(Opcode::Bswap, &[I128, I128], &[I128], insert_opcode),
// Fadd
(Opcode::Fadd, &[F32, F32], &[F32], insert_opcode),
(Opcode::Fadd, &[F64, F64], &[F64], insert_opcode),

View File

@@ -806,6 +806,7 @@ where
Opcode::UshrImm => binary_unsigned(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?,
Opcode::SshrImm => binary(Value::ishr, arg(0)?, imm_as_ctrl_ty()?)?,
Opcode::Bitrev => assign(Value::reverse_bits(arg(0)?)?),
Opcode::Bswap => assign(Value::swap_bytes(arg(0)?)?),
Opcode::Clz => assign(arg(0)?.leading_zeros()?),
Opcode::Cls => {
let count = if Value::lt(&arg(0)?, &Value::int(0, ctrl_ty)?)? {

View File

@@ -86,6 +86,7 @@ pub trait Value: Clone + From<DataValue> {
fn leading_zeros(self) -> ValueResult<Self>;
fn trailing_zeros(self) -> ValueResult<Self>;
fn reverse_bits(self) -> ValueResult<Self>;
fn swap_bytes(self) -> ValueResult<Self>;
}
#[derive(Error, Debug, PartialEq)]
@@ -716,4 +717,8 @@ impl Value for DataValue {
fn reverse_bits(self) -> ValueResult<Self> {
unary_match!(reverse_bits(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
}
fn swap_bytes(self) -> ValueResult<Self> {
unary_match!(swap_bytes(&self); [I16, I32, I64, I128, U16, U32, U64, U128])
}
}