cranelift: Add Bswap instruction (#1092) (#5147)

Adds Bswap to the Cranelift IR. Implements the Bswap instruction in the x64 and aarch64 codegen backends. Cranelift users can now: ``` builder.ins().bswap(value) ``` to get a native byteswap instruction. * x64: implements the 32- and 64-bit bswap instruction, following the pattern set by similar unary instrutions (Neg and Not) - it only operates on a dst register, but is parameterized with both a src and dst which are expected to be the same register. As x64 bswap instruction is only for 32- or 64-bit registers, the 16-bit swap is implemented as a rotate left by 8. Updated x64 RexFlags type to support emitting for single-operand instructions like bswap * aarch64: Bswap gets emitted as aarch64 rev16, rev32, or rev64 instruction as appropriate. * s390x: Bswap was already supported in backend, just had to add a bit of plumbing * For completeness, added bswap to the interpreter as well. * added filetests and runtests for each ISA * added bswap to fuzzgen, thanks to afonso360 for the code there * 128-bit swaps are not yet implemented, that can be done later
2022-10-31 12:30:00 -07:00
parent 95ecb7e4d4
commit 4ca9e82bd1
24 changed files with 455 additions and 0 deletions
--- a/cranelift/codegen/src/isa/x64/encoding/rex.rs
+++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs
@@ -105,6 +105,21 @@ impl RexFlags {
        (self.0 & 2) != 0
    }

+    #[inline(always)]
+    pub(crate) fn emit_one_op(&self, sink: &mut MachBuffer<Inst>, enc_e: u8) {
+        // Register Operand coded in Opcode Byte
+        // REX.R and REX.X unused
+        // REX.B == 1 accesses r8-r15
+        let w = if self.must_clear_w() { 0 } else { 1 };
+        let r = 0;
+        let x = 0;
+        let b = (enc_e >> 3) & 1;
+        let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+        if rex != 0x40 || self.must_always_emit() {
+            sink.put1(rex);
+        }
+    }
+
    #[inline(always)]
    pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
        let w = if self.must_clear_w() { 0 } else { 1 };
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -151,6 +151,11 @@
       (Setcc (cc CC)
              (dst WritableGpr))

+       ;; Swaps byte order in register
+       (Bswap (size OperandSize) ;; 4 or 8
+              (src Gpr)
+              (dst WritableGpr))
+
       ;; =========================================
       ;; Conditional moves.

@@ -1959,6 +1964,16 @@
 (rule (x64_sar ty src1 src2)
      (shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))

+;; Helper for creating byteswap instructions.
+;; In x64, 32- and 64-bit registers use BSWAP instruction, and
+;; for 16-bit registers one must instead use xchg or rol/ror
+(decl x64_bswap (Type Gpr) Gpr)
+(rule (x64_bswap ty src)
+      (let ((dst WritableGpr (temp_writable_gpr))
+            (size OperandSize (operand_size_of_type_32_64 ty))
+            (_ Unit (emit (MInst.Bswap size src dst))))
+        dst))
+
 ;; Helper for creating `MInst.CmpRmiR` instructions.
 (decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
 (rule (cmp_rmi_r size opcode src1 src2)
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1115,6 +1115,21 @@ pub(crate) fn emit(
            );
        }

+        Inst::Bswap { size, src, dst } => {
+            let src = allocs.next(src.to_reg());
+            let dst = allocs.next(dst.to_reg().to_reg());
+            debug_assert_eq!(src, dst);
+            let enc_reg = int_reg_enc(dst);
+
+            // BSWAP reg32 is (REX.W==0) 0F C8
+            // BSWAP reg64 is (REX.W==1) 0F C8
+            let rex_flags = RexFlags::from(*size);
+            rex_flags.emit_one_op(sink, enc_reg);
+
+            sink.put1(0x0F);
+            sink.put1(0xC8 | (enc_reg & 7));
+        }
+
        Inst::Cmove {
            size,
            cc,
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -107,6 +107,13 @@ impl Inst {
        Inst::Setcc { cc, dst }
    }

+    fn bswap(size: OperandSize, dst: Writable<Reg>) -> Inst {
+        debug_assert!(dst.to_reg().class() == RegClass::Int);
+        let src = Gpr::new(dst.to_reg()).unwrap();
+        let dst = WritableGpr::from_writable_reg(dst).unwrap();
+        Inst::Bswap { size, src, dst }
+    }
+
    fn xmm_rm_r_imm(
        op: SseOpcode,
        src: RegMem,
@@ -3505,6 +3512,55 @@ fn test_x64_emit() {
    insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle   %r14b"));
    insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp    %r9b"));
    insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp   %r8b"));
+
+    // ========================================================
+    // Bswap
+    insns.push((
+        Inst::bswap(OperandSize::Size64, w_rax),
+        "480FC8",
+        "bswapq  %rax, %rax",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size64, w_r8),
+        "490FC8",
+        "bswapq  %r8, %r8",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size32, w_rax),
+        "0FC8",
+        "bswapl  %eax, %eax",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size64, w_rcx),
+        "480FC9",
+        "bswapq  %rcx, %rcx",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size32, w_rcx),
+        "0FC9",
+        "bswapl  %ecx, %ecx",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size64, w_r11),
+        "490FCB",
+        "bswapq  %r11, %r11",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size32, w_r11),
+        "410FCB",
+        "bswapl  %r11d, %r11d",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size64, w_r14),
+        "490FCE",
+        "bswapq  %r14, %r14",
+    ));
+    insns.push((
+        Inst::bswap(OperandSize::Size32, w_r14),
+        "410FCE",
+        "bswapl  %r14d, %r14d",
+    ));
+
    // ========================================================
    // Cmove
    insns.push((
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -68,6 +68,7 @@ impl Inst {
            Inst::AluRmiR { .. }
            | Inst::AluRM { .. }
            | Inst::AtomicRmwSeq { .. }
+            | Inst::Bswap { .. }
            | Inst::CallKnown { .. }
            | Inst::CallUnknown { .. }
            | Inst::CheckedDivOrRemSeq { .. }
@@ -1373,6 +1374,17 @@ impl PrettyPrint for Inst {
                format!("{} {}", ljustify2("set".to_string(), cc.to_string()), dst)
            }

+            Inst::Bswap { size, src, dst } => {
+                let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
+                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
+                format!(
+                    "{} {}, {}",
+                    ljustify2("bswap".to_string(), suffix_bwlq(*size)),
+                    src,
+                    dst
+                )
+            }
+
            Inst::Cmove {
                size,
                cc,
@@ -1953,6 +1965,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
        Inst::Setcc { dst, .. } => {
            collector.reg_def(dst.to_writable_reg());
        }
+        Inst::Bswap { src, dst, .. } => {
+            collector.reg_use(src.to_reg());
+            collector.reg_reuse_def(dst.to_writable_reg(), 0);
+        }
        Inst::Cmove {
            consequent,
            alternative,
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -2065,6 +2065,19 @@
                            hi32)))
        swap32))

+;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; x64 bswap instruction is only for 32- or 64-bit swaps
+;; implement the 16-bit swap as a rotl by 8
+(rule (lower (has_type $I16 (bswap src)))
+      (x64_rotl $I16 src (Imm8Reg.Imm8 8)))
+
+(rule (lower (has_type $I32 (bswap src)))
+      (x64_bswap $I32 src))
+
+(rule (lower (has_type $I64 (bswap src)))
+      (x64_bswap $I64 src))
+
 ;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Null references are represented by the constant value `0`.
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -363,6 +363,7 @@ fn lower_insn_to_regs(
        | Opcode::Ctz
        | Opcode::Popcnt
        | Opcode::Bitrev
+        | Opcode::Bswap
        | Opcode::IsNull
        | Opcode::IsInvalid
        | Opcode::Uextend